Merge branch 'master' into ignore-user-blank

This commit is contained in:
Kim Gardner
2018-03-08 13:44:07 -05:00
committed by GitHub
7 changed files with 27 additions and 30 deletions
+1 -1
View File
@@ -52,7 +52,7 @@ class Context {
this.id = ctx.id || uuid.v4();
// Attach a logger or create one.
this.log = ctx.log || createLogger('context', this.id);
this.log = ctx.log || createLogger('graph:context', this.id);
// Load the current logged in user to `user`, otherwise this will be null.
this.user = get(ctx, 'user');
+1 -1
View File
@@ -119,7 +119,7 @@ const findOrCreateAssetByURL = async (ctx, url) => {
// If this is a new asset, then we need to scrape it!
if (!asset.scraped) {
// Create the Scraper job.
await Scraper.create(asset);
await Scraper.create(ctx, asset.id);
}
return asset;
+2 -2
View File
@@ -63,9 +63,9 @@ const closeNow = async (ctx, id) =>
* @param {String} id the asset's id to scrape
*/
const scrapeAsset = async (ctx, id) => {
const { services: { Scraper } } = ctx;
const { connectors: { services: { Scraper } } } = ctx;
return Scraper.create({ id });
return Scraper.create(ctx, id);
};
module.exports = ctx => {
+1 -1
View File
@@ -1,4 +1,4 @@
const jobs = [require('./mailer')];
const jobs = [require('./mailer'), require('./scraper')];
const process = () => jobs.forEach(job => job());
+11 -14
View File
@@ -1,7 +1,8 @@
const Asset = require('../models/asset');
const scraper = require('../services/scraper');
const Assets = require('../services/assets');
const debug = require('debug')('talk:jobs:scraper');
const { createLogger } = require('../services/logging');
const logger = createLogger('jobs:scraper');
const metascraper = require('metascraper');
/**
@@ -39,38 +40,34 @@ function update(id, meta) {
}
module.exports = () => {
debug(`Now processing ${scraper.task.name} jobs`);
logger.info({ taskName: scraper.task.name }, 'Now processing jobs');
scraper.task.process(async (job, done) => {
debug(`Starting on Job[${job.id}] for Asset[${job.data.asset_id}]`);
const { id, asset_id } = job.data;
const log = logger.child({ traceID: id, jobID: job.id, assetID: asset_id });
log.info('Starting scrape');
try {
// Find the asset, or complain that it doesn't exist.
const asset = await Assets.findById(job.data.asset_id);
if (!asset) {
return done(new Error('asset not found'));
throw new Error('asset not found');
}
// Scrape the metadata from the asset.
const meta = await scrape(asset);
debug(
`Scraped ${JSON.stringify(meta)} on Job[${job.id}] for Asset[${
job.data.asset_id
}]`
);
log.info('Finished scraping');
// Assign the metadata retrieved for the asset to the db.
await update(job.data.asset_id, meta);
} catch (err) {
debug(
`Failed to scrape on Job[${job.id}] for Asset[${job.data.asset_id}]:`,
err
);
log.error({ err }, 'Failed to scrape');
return done(err);
}
debug(`Finished on Job[${job.id}] for Asset[${job.data.asset_id}]`);
log.info('Finished updating');
done();
});
};
+5 -5
View File
@@ -1,17 +1,17 @@
const { version } = require('../package.json');
const Logger = require('bunyan');
const uuid = require('uuid/v1');
const { LOGGING_LEVEL } = require('../config');
const { LOGGING_LEVEL, REVISION_HASH } = require('../config');
// Create the logging instance that all logger's are branched from.
function createLogger(name, id = uuid()) {
function createLogger(name, traceID) {
return new Logger({
src: true,
name,
id,
traceID,
version,
revision: REVISION_HASH,
level: LOGGING_LEVEL,
serializers: { req: Logger.stdSerializers.req },
serializers: Logger.stdSerializers,
});
}
+6 -6
View File
@@ -1,5 +1,4 @@
const kue = require('./kue');
const debug = require('debug')('talk:services:scraper');
/**
* Exposes a service object to allow operations to execute against the scraper.
@@ -16,15 +15,16 @@ const scraper = {
/**
* Creates a new scraper job and scrapes the url when it gets processed.
*/
async create(asset) {
debug(`Creating job for Asset[${asset.id}]`);
async create(ctx, id) {
ctx.log.info({ assetID: id }, 'Creating job');
const job = await scraper.task.create({
title: `Scrape for asset ${asset.id}`,
asset_id: asset.id,
title: `Scrape for asset ${id}`,
id: ctx.id,
asset_id: id,
});
debug(`Created Job[${job.id}] for Asset[${asset.id}]`);
ctx.log.info({ jobID: job.id, assetID: id }, 'Created job');
return job;
},