mirror of
https://github.com/wassname/talk.git
synced 2026-07-02 14:32:55 +08:00
Merge branch 'master' into ignore-user-blank
This commit is contained in:
+1
-1
@@ -52,7 +52,7 @@ class Context {
|
||||
this.id = ctx.id || uuid.v4();
|
||||
|
||||
// Attach a logger or create one.
|
||||
this.log = ctx.log || createLogger('context', this.id);
|
||||
this.log = ctx.log || createLogger('graph:context', this.id);
|
||||
|
||||
// Load the current logged in user to `user`, otherwise this will be null.
|
||||
this.user = get(ctx, 'user');
|
||||
|
||||
@@ -119,7 +119,7 @@ const findOrCreateAssetByURL = async (ctx, url) => {
|
||||
// If this is a new asset, then we need to scrape it!
|
||||
if (!asset.scraped) {
|
||||
// Create the Scraper job.
|
||||
await Scraper.create(asset);
|
||||
await Scraper.create(ctx, asset.id);
|
||||
}
|
||||
|
||||
return asset;
|
||||
|
||||
@@ -63,9 +63,9 @@ const closeNow = async (ctx, id) =>
|
||||
* @param {String} id the asset's id to scrape
|
||||
*/
|
||||
const scrapeAsset = async (ctx, id) => {
|
||||
const { services: { Scraper } } = ctx;
|
||||
const { connectors: { services: { Scraper } } } = ctx;
|
||||
|
||||
return Scraper.create({ id });
|
||||
return Scraper.create(ctx, id);
|
||||
};
|
||||
|
||||
module.exports = ctx => {
|
||||
|
||||
+1
-1
@@ -1,4 +1,4 @@
|
||||
const jobs = [require('./mailer')];
|
||||
const jobs = [require('./mailer'), require('./scraper')];
|
||||
|
||||
const process = () => jobs.forEach(job => job());
|
||||
|
||||
|
||||
+11
-14
@@ -1,7 +1,8 @@
|
||||
const Asset = require('../models/asset');
|
||||
const scraper = require('../services/scraper');
|
||||
const Assets = require('../services/assets');
|
||||
const debug = require('debug')('talk:jobs:scraper');
|
||||
const { createLogger } = require('../services/logging');
|
||||
const logger = createLogger('jobs:scraper');
|
||||
const metascraper = require('metascraper');
|
||||
|
||||
/**
|
||||
@@ -39,38 +40,34 @@ function update(id, meta) {
|
||||
}
|
||||
|
||||
module.exports = () => {
|
||||
debug(`Now processing ${scraper.task.name} jobs`);
|
||||
logger.info({ taskName: scraper.task.name }, 'Now processing jobs');
|
||||
|
||||
scraper.task.process(async (job, done) => {
|
||||
debug(`Starting on Job[${job.id}] for Asset[${job.data.asset_id}]`);
|
||||
const { id, asset_id } = job.data;
|
||||
|
||||
const log = logger.child({ traceID: id, jobID: job.id, assetID: asset_id });
|
||||
log.info('Starting scrape');
|
||||
|
||||
try {
|
||||
// Find the asset, or complain that it doesn't exist.
|
||||
const asset = await Assets.findById(job.data.asset_id);
|
||||
if (!asset) {
|
||||
return done(new Error('asset not found'));
|
||||
throw new Error('asset not found');
|
||||
}
|
||||
|
||||
// Scrape the metadata from the asset.
|
||||
const meta = await scrape(asset);
|
||||
|
||||
debug(
|
||||
`Scraped ${JSON.stringify(meta)} on Job[${job.id}] for Asset[${
|
||||
job.data.asset_id
|
||||
}]`
|
||||
);
|
||||
log.info('Finished scraping');
|
||||
|
||||
// Assign the metadata retrieved for the asset to the db.
|
||||
await update(job.data.asset_id, meta);
|
||||
} catch (err) {
|
||||
debug(
|
||||
`Failed to scrape on Job[${job.id}] for Asset[${job.data.asset_id}]:`,
|
||||
err
|
||||
);
|
||||
log.error({ err }, 'Failed to scrape');
|
||||
return done(err);
|
||||
}
|
||||
|
||||
debug(`Finished on Job[${job.id}] for Asset[${job.data.asset_id}]`);
|
||||
log.info('Finished updating');
|
||||
done();
|
||||
});
|
||||
};
|
||||
|
||||
+5
-5
@@ -1,17 +1,17 @@
|
||||
const { version } = require('../package.json');
|
||||
const Logger = require('bunyan');
|
||||
const uuid = require('uuid/v1');
|
||||
const { LOGGING_LEVEL } = require('../config');
|
||||
const { LOGGING_LEVEL, REVISION_HASH } = require('../config');
|
||||
|
||||
// Create the logging instance that all logger's are branched from.
|
||||
function createLogger(name, id = uuid()) {
|
||||
function createLogger(name, traceID) {
|
||||
return new Logger({
|
||||
src: true,
|
||||
name,
|
||||
id,
|
||||
traceID,
|
||||
version,
|
||||
revision: REVISION_HASH,
|
||||
level: LOGGING_LEVEL,
|
||||
serializers: { req: Logger.stdSerializers.req },
|
||||
serializers: Logger.stdSerializers,
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
+6
-6
@@ -1,5 +1,4 @@
|
||||
const kue = require('./kue');
|
||||
const debug = require('debug')('talk:services:scraper');
|
||||
|
||||
/**
|
||||
* Exposes a service object to allow operations to execute against the scraper.
|
||||
@@ -16,15 +15,16 @@ const scraper = {
|
||||
/**
|
||||
* Creates a new scraper job and scrapes the url when it gets processed.
|
||||
*/
|
||||
async create(asset) {
|
||||
debug(`Creating job for Asset[${asset.id}]`);
|
||||
async create(ctx, id) {
|
||||
ctx.log.info({ assetID: id }, 'Creating job');
|
||||
|
||||
const job = await scraper.task.create({
|
||||
title: `Scrape for asset ${asset.id}`,
|
||||
asset_id: asset.id,
|
||||
title: `Scrape for asset ${id}`,
|
||||
id: ctx.id,
|
||||
asset_id: id,
|
||||
});
|
||||
|
||||
debug(`Created Job[${job.id}] for Asset[${asset.id}]`);
|
||||
ctx.log.info({ jobID: job.id, assetID: id }, 'Created job');
|
||||
|
||||
return job;
|
||||
},
|
||||
|
||||
Reference in New Issue
Block a user