Skip to content
This repository was archived by the owner on Apr 24, 2018. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 35 additions & 20 deletions data/dataIO.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,13 @@ const _log_ = require('../utils.js')._log_
const giveConfig = require('../src.js').config

const dataIO = {}
module.exports = dataIO
const dataIOTests = {
convertToJson
}
module.exports = {
dataIO,
dataIOTests
}

const url = require('url')
const parsedStatsdUrl = url.parse(process.env.STATSD_URL, true, true)
Expand Down Expand Up @@ -45,40 +51,49 @@ dataIO.get = (srcName, srcType, srcLink, newJSON, oldJSON) => {
fs.ensureFileSync(oldJSON)

// Save old data
fs.copySync(newJSON, oldJSON)
fs.renameSync(newJSON, oldJSON)

statsClient.increment('eventsparser.get')

// Get data from source
const res = request('GET', srcLink).getBody()
const json = convertToJson(res, srcType)
fs.writeFileSync(newJSON, json)

const old = fs.readFileSync(oldJSON)
if (old == '') { // not rewrite to '==='
_log_(`${srcName}: INIT`)

return false
}

return true
}

switch (srcType) {
/**
* convertToJson - convert data to json.
* @param {} data - data for convert.
* @param {string} dataType - datatype.
* @returns {JSON} json analogue to original data..
*/
function convertToJson (data, dataType) {
let json
switch (dataType) {
case 'xml':
const jsonBody = xml2json.toJson(res, {'sanitize': false})
fs.writeFileSync(newJSON, jsonBody)
json = xml2json.toJson(data, {'sanitize': false})
break
case 'json':
const readableBody = JSON.stringify(JSON.parse(res))
fs.writeFileSync(newJSON, readableBody)
json = JSON.stringify(JSON.parse(data))
break
case 'rawAin':
const links = ainGetLinks(res)
const ainBody = JSON.stringify(JSON.parse(links))
fs.writeFileSync(newJSON, ainBody)
const links = ainGetLinks(data)
json = JSON.stringify(JSON.parse(links))
break
default:
_log_(`ERROR: NOT FOUND ${srcType} in dataIO.get`)
_log_(`ERROR: NOT FOUND ${dataType} in dataIO: convertToJson`)
}

const old = fs.readFileSync(oldJSON)

if (old == '') { // not rewrite to '==='
_log_(`${srcName}: INIT`)

return false
}

return true
return json
}

/**
Expand Down
17 changes: 10 additions & 7 deletions data/transform.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,14 @@ module.exports = transform
transform.title = (data) => {
const title = data
// Remove words 'free' and 'webinar'
.replace(/(бесплат|вебин|безкоштовн|вебін)[а-я]+\s/ig, '')
.replace(/((бесплат|вебин|безкоштовн|вебін)[а-я]+|free|webinars?)/ig, '')
// Remove quotation mark
.replace(/[",«,‘,“,„]+(.{0,})+[",»,’,”,“]/, '$1')
.replace(/["«»‘’“”„“]/g, '')
// Remove useless spaces
.trim().replace(/\s{2,}/g, ' ')
// Remove dots in the end
.replace(/(.{0,})(\.{1,})/, '$1')
.replace(/\.+$/g, '')


return title
}
Expand All @@ -32,16 +35,16 @@ transform.title = (data) => {
*/
transform.agenda = (data) => {
const agenda = data
// Remove word 'free'
.replace(/(бесплат|безкоштовн)[а-я]+\s/ig, '')
// Remove words 'free'
.replace(/((бесплат|безкоштовн)[а-я]+|free)\s?/ig, '')
// Remove images
.replace(/<img.+?">(<br>)?/g, '')
// Replace Header-text to Bold-text
.replace(/h[1-4]{1}(\sstyle=".{0,}")?>/g, 'b><br>')
// Remove iframes
.replace(/<p><iframe.{0,}iframe><\/p>|<iframe.{0,}iframe>/g, '')
// Replace spans to paragraph
.replace(/<span.+?>(.+?)<\/span>(<br>)?/g, '<p>$1</p>')
.replace(/<span.*?>(.+?)<\/span>/g, '<p>$1</p>')
// Replace \n to <br>
.replace(/\n/g, '<br>')
// Replace paragraph with custom line to horizontal rule
Expand Down Expand Up @@ -78,7 +81,7 @@ transform.place = (data) => {
.replace(/(Украина|Україна|Ukraine)(,\s)?/, '')
// Remove 'Kyiv' from the field
.replace(/(Киев|Київ|Kyiv|Kiev)(,\s)?/, '')
// Remove html tags // FIX ME: It looks like a crutch. Need rewrite.
// Remove html tags
.replace(/(.*?)<.+?>(.+?)/g, '$1$2')

return place
Expand Down
2 changes: 1 addition & 1 deletion main.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ const path = require('path')

const parse = require('./data/parse')
const inBlackList = require('./data/blackList').inBlackList
const dataIO = require('./data/dataIO')
const { dataIO } = require('./data/dataIO')
const transform = require('./data/transform')
const src = require('./src')

Expand Down
67 changes: 67 additions & 0 deletions tests/data/dataIO.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
'use strict'

const assert = require('assert')
const fs = require('fs-extra')
const { dataIO, dataIOTests } = require('../../data/dataIO.js')
const util = require('util')
const links = require(`${__dirname}/../fake_data/link/links.js`).links

const path = `${__dirname}/../fake_data`
const types = ['xml', 'json', 'rawAin']
const sources = ['dou', 'meetup', 'ain', 'fb', 'bigCityEvent']

function get () {
const srcName = 'testSrcName'
const srcLink = 'https://google.com.ua'
const srcType = 'testSrcType'
const newJSON = `${path}/json/new.json`
const oldJSON = `${path}/json/old.json`

dataIO.get(srcName, srcType, srcLink, newJSON, oldJSON)

const statsNew = fs.statSync(newJSON || __dirname)
const statsOld = fs.statSync(oldJSON || __dirname)
assert(statsNew.isFile(), 'Not created newJSON file')
assert(statsOld.isFile(), 'Not created oldJSON file')
assert(statsOld.size === 0, 'Something wrong in INITing')

dataIO.get(srcName, srcType, srcLink, newJSON, oldJSON)

const statsNewOld = fs.statSync(oldJSON || __dirname)
assert.strictEqual(statsNew.mtime.toString(), statsNewOld.mtime.toString(), 'Not copied old file')

// Remove test data
fs.unlinkSync(newJSON)
fs.unlinkSync(oldJSON)
fs.rmdirSync(`${path}/json/`)
}

function convertToJson (types) {
types.forEach((type) => {
const dataType = fs.readFileSync(`${path}/convert.${type}`)
const convertType = dataIOTests.convertToJson(dataType, type)
const jsonType = fs.readFileSync(`${path}/${type}.json`).toString()
assert.strictEqual(convertType, jsonType)
})
}

function read (sources) {
sources.forEach((source) => {
const src = util.inspect(dataIO.read(source, `${path}/read/${source}.json`), 0, null)
const srcRead = fs.readFileSync(`${path}/read/${source}Read`).toString()
assert.strictEqual(src, srcRead)
})
}

function link (sources) {
sources.forEach((source) => {
const file = fs.readJsonSync(`${path}/link/${source}.json`)
const link = dataIO.link(source, file, [0])
assert.strictEqual(link, links[source])
})
}

get()
convertToJson(types)
read(sources)
link(sources)
Loading