使用puppeteer操作headless chrome获取coincheck聊天版内容

交易所的聊天版是涨跌信息材料的重要来源,这次我们用puppeteer来获取coincheck的聊天版信息。

puppeteer可以很方便操作headless-chrome来爬取网页,另外可以很方便监听页面上的各种事件,ajax和websocket也不需要担心跨域问题。

经过调查发现,coincheck的聊天版通过定时调用ajax获取聊天信息更新。获取这个ajax的内容使用puppeteer非常简单。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
const puppeteer = require('puppeteer');
(async () => {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto('https://coincheck.com/ja/chats');
page.on('response', response => {
response.text().then(function (textBody) {
try {
chats = JSON.parse(textBody).chats;
for(let i=0; i < chats.length; i++) {
console.log(chats[i].name + ": " + chats[i].content)
}
} catch(e) {
}
})
})
})();

为了大家方便使用上传github了。
自取

ise ?">how to turn a function with callback into a promise ?
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
// function fetchURL(url, callback);
// function parsePage(content, callback);
let fetch = new Promise((resolve, reject) => {
fetchURL(url, (err,result)=>{
if(err){return reject(err);
} else {
return resolve(result);
}
})
})
let parse = new Promise((resolve, reject) => {
parsePage(content, (err,result)=>{
if(err){return reject(err);
} else {
return resolve(result);
}
})
})

then instead of calling

1
2
3
4
5
fetchURL("https://google.com", (err,content)=>{
parsePage(content, (err, result)=>{
//do something here
})
})

we can write something like

1
2
3
4
5
6
fetch("https://google.com").then(
(result)=> parse(result)
).then((result)=>{
//do something with the result;
}
)

then callback hell is turned into promise chain.

promisify

There exists a general way to turn callback-based function into one that returns a promise.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
module.exports.promisify = function(callbackBasedApi) {
return function promisified() {
const args = [].slice.call(arguments);
return new Promise((resolve, reject) => {
args.push((err, result) => {
if(err) {
return reject(err);
}
if(arguments.length <= 2) {
resolve(result);
} else {
resolve([].slice.call(arguments, 1));
}
});
callbackBasedApi.apply(null, args);
});
}
};

create a function called promisified, which return a promise.

in the promise initialize function, we push a callback function into the arguments into promisified and use the new arguments to call callbackBasedApi, in the callback we pushed into arguments, it use resolve and reject to fullfill a the promise.

Sequential Iteration

1
2
3
4
5
6
7
8
9
10
let tasks = [ /* ... */ ]
let promise = Promise.resolve();
tasks.forEach(task => {
promise = promise.then(() => {
return task();
});
});
promise.then(() => {
//All tasks completed
});

or instead of forEach, use reduce.

1
2
3
4
5
6
7
8
9
10
11
let tasks = [ /* ... */ ]
let promise = tasks.reduce((prev, task) => {
return prev.then(() => {
return task();
});
}, Promise.resolve());


promise.then(() => {
//All tasks completed
});

Both use a Promise.resove() to kick the first iteration.

Parallel execution

1
2
3
4
5
6
let promises = [ /* promises */ ]


Promise.all(promises).then(() => {
//All tasks completed
});
Javascript

Ajax with Phoenix API

备忘一下如何在Ajax Post的用法。
注意点是以前Ajax相当于在浏览器中调用API,所以发起请求时候会带上Cookie。
另外同时需要带上csrf token。

1
2
3
4
5
6
7
8
9
10
11
$.ajax({
method: "POST",
url: "/tags",
data: JSON.stringify({tag: {name: "John"},
_csrf_token: "IA19DUIPbQcJFURhdTo5fD4EOW49NgAAKJ7Y+H9cOb1QFWU+Xss/Uw=="}),
contentType:"application/json; charset=utf-8",
dataType:"json"
})
.done(function( msg ) {
alert( "Data Saved: " + msg );
});

Api for both promise or callback

如果没有callback参数就直接resolve或者reject,如果有callback参数就执行callback,然后resove或者reject。 于是我们看到当我们带上callback,然后又用then去chain promise的时候就返回了两次结果。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16

// divid.js
module.exports = function asyncDivision (dividend, divisor, cb) {
return new Promise((resolve, reject) => {
process.nextTick(() => {
const result = dividend / divisor;
if (isNaN(result) || !Number.isFinite(result)) {
const error = new Error('Invalid operands');
if (cb) { cb(error); }
return reject(error);
}
if (cb) { cb(null, result); }
resolve(result);
});
});
};

1
2
3
4
5
6
7
8
9
10
11
12

let asyncDivision = require('./divid')

asyncDivision(10, 2, (error, result) => {
if (error) {
return console.error(error);
}
console.log(result);
})
.then(result => console.log(result)) // [1]
.catch(error => console.error(error)); // [2]
;
1
2
3
4
//result
☁ 5:54PM node_design_pattern node test.js
5
5