bsctl
diff --git a/‎.gitignore
+1 b/‎.gitignore
+1
diff --git a/‎README.md
+166 b/‎README.md
+166
diff --git a/‎lib/wscraper.js
+165 b/‎lib/wscraper.js
+165
diff --git a/‎node_modules/cheerio/.npmignore
+6 b/‎node_modules/cheerio/.npmignore
+6
diff --git a/‎node_modules/cheerio/.travis.yml
+4 b/‎node_modules/cheerio/.travis.yml
+4
@@ -0,0 +1 @@
+.DS_Store
@@ -0,0 +1,166 @@
+# wscraper
+
+wscraper.js is a web scraper agent written in node.js and based on [cheerio.js][0] a fast, flexible, and lean implementation of core jQuery;
+It is built on top of [request.js][1] and inspired by [http-agent.js][2];
+
+## Usage 
+
+There are two ways to use wscraper: http agent mode and local mode. 
+
+### HTTP Agent mode
+In HTTP Agent mode, pass it a host, a list of URLs to visit and a scraping JS script. For each URLs, the agent makes a request, gets the response, runs the scraping script and returns the result of the scraping. Valid usage is:
+
+```js
+// scrape a single page from a web site
+var agent = wscraper.createAgent();
+agent.start('google.com', '/finance', script);
+
+// scrape multiple pages from a website
+wscraper.start('google.com', ['/', '/finance', '/news'], script);
+```
+
+The URLs should be passed as an array of strings. In case only one page needs to be scraped, the URL can be passed as a single string. Null or empty URLs are treated as  root '/'. Suppose you want to scrape from http://google.com/finance website the stocks price of the following companies: Apple, Cisco and Microsoft.
+
+```js
+// load node.js libraries
+var	util = require('util');
+var	wscraper = require('wscraper');
+var	fs = require('fs');
+
+// load the scraping script from a file
+var script = fs.readFileSync('/scripts/googlefinance.js');
+
+var companies = ['/finance?q=apple', '/finance?q=cisco', '/finance?q=microsoft'];
+
+// create a web scraper agent instance
+var agent = wscraper.createAgent();
+
+agent.on('start', function (n) {
+	util.log('[wscraper.js] agent has started; ' + n + ' path(s) to visit');
+});
+
+agent.on('done', function (url, price) {
+	util.log('[wscraper.js] data from ' + url);
+	// display the results	
+	util.log('[wscraper.js] current stock price is ' + price + ' USD');
+	// next item to process if any
+	agent.next();		
+});
+
+agent.on('stop', function (n) {
+	util.log('[wscraper.js] agent has ended; ' + n + ' path(s) remained to visit');
+});
+
+agent.on('abort', function (e) {
+	util.log('[wscraper.js] getting a FATAL ERROR [' + e + ']');
+	util.log('[wscraper.js] agent has aborted');
+	process.exit();
+});
+
+// run the web scraper agent
+agent.start('www.google.com', companies, script);
+```
+
+The scraping script should be pure client JavaScript, including JQuery selectors. See [cheerio.js][0] for details. I should return a valid JavaScript object.
+The scraping script is passed as a string and usually is read from a file. You can scrape different websites without change any line of the main code: only write different JavaScript scripts.
+The scraping script is executed in a sandbox using a separate VM context and the script errors are caught without crash of the main code.
+
+At time of writing, google.com/finance website reports financial data of public companies as in the following html snippet:
+
+```html
+...
+<div id="price-panel" class="id-price-panel goog-inline-block">
+  <div>
+    <span class="pr">
+  	<span id="ref_22144_l">656.06</span>
+    </span>
+  </div>
+</div>
+...
+```
+By using JQuery selectors, we design the scraping script "googlefinance.js" to find the current value of a company stocks and return it as a text:
+
+```js
+/*
+
+googlefinance.js
+
+$ -> is the DOM document to be parsed
+result -> is the object containing the result of parsing
+*/
+
+result = {};
+price = $('div.id-price-panel').find('span.pr').children().text();
+result.price = price;
+
+// result is '656.06'
+```
+
+### Local mode
+Sometimes, you need to scrape local html files without make a request to a remote server. Wscraper can be used as inline scraper. It takes an html string and a JS scraping script. The scraper runs the scraping script and returns the result of the scraping. Valid usage is:
+
+```js
+var scraper = wscraper.createScraper();
+scraper.run(html, script);
+```
+
+Only as trivial example, suppose you want to replace the class name of <div> elements only containing an image with a given class. Create a scraper:
+
+```js
+// load node.js libraries
+var	util = require('util');
+var	fs = require('fs');
+var	wscraper = require('wscraper');
+
+// load your html page
+var html = fs.readFileSync('/index.html');
+
+// load the scraping script from a file
+var script = fs.readFileSync('/scripts/replace.js');
+
+// create the scraper
+var scraper = wscraper.createScraper();
+
+scraper.on('done', function(result) {
+	// do something with the result
+	util.log(result)
+});
+
+scraper.on('abort', function(e) {
+	util.log('Getting error in parsing: ' + e)
+});
+
+// run the scraper
+scraper.run(html, script);
+```
+
+By using JQuery selectors, we design the scraping script "replace.js" to find the <div> elements containing images with class="MyPhotos" and replace each of them with a <div> element having class="Hidden" without any image inside.
+
+```js
+/*
+replace.js
+
+$ -> is the DOM document to be parsed
+result -> is the final JSON string containing the result of parsing
+use var js-obj = JSON.parse(result) to get a js object from the json string
+use JSON.stringify(js-obj) to get back a json string from the js object
+*/
+
+result = {};
+var imgs = $('img.MyPhotos').toArray();
+$.each(imgs, function(index, elem) {
+	var parentdiv = $(elem).parent();
+	var newdiv = $('<div class="Hidden"/></div>');
+	$(elem).parent().replaceWith(newdiv)
+});
+
+result.replaced = $.html() || '';
+```
+
+Happy scraping!
+
+### Author: kalise © 2012 MIT Licensed;
+
+[0]: https://github.com/MatthewMueller/cheerio
+[1]: https://github.com/mikeal/request
+[2]: https://github.com/indexzero/http-agent
@@ -0,0 +1,165 @@
+/*
+ * wscraper.js: a web scraper agent based on cheerio.js a fast, flexible, and lean implementation of core jQuery;
+ * built on top of request.js;
+ * inspired by http-agent.js;
+ *
+ * (C) 2012 Kalise
+ * MIT LICENSE
+ *
+ */
+
+var	fs = require('fs'),
+	util = require('util'),
+	EventEmitter = require("events").EventEmitter,
+	vm = require('vm'),
+	request = require('request'),
+	cheerio = require('cheerio'),
+	Iconv  = require('iconv').Iconv;
+
+exports.createAgent = function () {
+	 return new WebScraper();
+};
+
+var WebScraper = function () {
+	EventEmitter.call(this);
+	this.host = '';	
+	this.paths = [];
+	this.script = '';
+	this.sandbox = {
+		$: '', // $ -> is the DOM document to be parsed
+		result: {} // result -> is the JSON object containing the result of parsing
+	};	
+	this.running = false;
+	this.unvisited = []; 	
+	this.options = {
+		uri: '',
+		method: 'GET',
+		headers: { 'accept-charset':'UTF-8', 'accept':'text/html' },
+		encoding: null
+	};
+};
+
+util.inherits(WebScraper, EventEmitter);
+
+WebScraper.prototype.start = function(host, paths, script) {
+	if (!this.running) {
+	    this.running = true;
+		this.host = host || 'localhost';
+		if ((paths instanceof Array) && paths.length) {
+			this.paths = paths		
+		};
+		if (typeof paths === 'string') {
+			this.paths[0] = paths
+		};
+		this.script = script || '';
+		// in javascript, assigning an array or an object to a variable makes a reference to the value,
+		// so we are using the slice(0) function to make a copy of the array.
+		this.unvisited = this.paths.slice(0);	
+	    this.emit('start', this.paths.length);
+	    this.next();
+	}
+	else util.log('[wscraper.j] agent is still running, use agent.stop() before to start it again');
+};
+
+WebScraper.prototype.stop = function() {
+	if (this.running) {
+	    this.running = false;
+	    this.emit('stop', this.unvisited.length);
+	}
+	else util.log('[wscraper.j] agent is not running, use agent.start() before to stop it');
+};
+
+WebScraper.prototype.next = function() {
+	if (this.running) {		
+		if (this.unvisited.length > 0) {
+			var path = this.unvisited.shift();
+			var url = '';
+			if (path.indexOf('/') == 0) {
+				url = 'http://' + this.host + path;
+			} else {
+				url = 'http://' + this.host + '/' + path;
+			};			
+			util.log('[wscraper.js] sending a request to: ' + url);
+			this.options.uri = url;
+			var self = this;
+			request(self.options, function (error, response, body) {
+				// currently only 200 Ok code is expected as valid for web scraping
+				// TODO: handle 3XX (redirections) status codes
+				if (error || response.statusCode !=200) {
+					self.emit('abort', 'error or bad response from ' + url);
+					return
+				};
+				var data = body || {};
+				// check the enconding header in the response.headers['content-type'] in order to understand the encoding used by the server
+				// TODO: support all conversions supported by iconv.js
+				var encoding = 'UTF-8';
+				if (response.headers['content-type'].match('charset=ISO-8859-1')) {
+					encoding = 'ISO-8859-1';
+				};
+				if (encoding != 'UTF-8') { // convert data stream from ISO-8859-1 to UTF-8 encoding
+					var iconv = new Iconv(encoding, 'UTF-8');
+					data = iconv.convert(body);
+				}
+				// load the data in the sandbox
+				self.sandbox.$ = cheerio.load(data.toString());
+				try {
+					// run the script in the sandbox
+					vm.runInNewContext(self.script, self.sandbox);
+				} catch (e) {
+				    self.emit('abort', e); // catch any error from the script
+					return;
+				}
+				if (self.sandbox.result) {
+					self.emit('done', url, self.sandbox.result)
+				} else {
+					self.emit('abort', 'parsing script is returning null value!')
+				};
+			})	
+		}
+		else {
+			this.stop();
+		}
+	}
+	else util.log('[wscraper.j] agent is not running, start it by calling agent.start()');
+};
+
+// use of the Scraper object without make any http request
+exports.createScraper = function () {
+	 return new Scraper();
+};
+	
+var Scraper = function () {
+	EventEmitter.call(this);
+	this.html = '';
+	this.script = '';
+	this.sandbox = {
+		$: '', // $ -> is the DOM document to be parsed
+		result: {} // result -> is the JSON object containing the result of parsing
+	};	
+};
+
+util.inherits(Scraper, EventEmitter);
+
+Scraper.prototype.run = function (html, script) {
+	this.html = html || '';
+	this.script = script || '';
+	this.emit('run');
+    this.sandbox.$ = cheerio.load(this.html);
+	// run the loaded script in a sandbox
+	try {
+		vm.runInNewContext(this.script.toString(), this.sandbox);
+		// emit the "done" event and pass the result to the callback function
+	} catch (e) {
+	    this.emit('abort', e);
+		return;
+	}
+	if (this.sandbox.result) {
+		this.emit('done', this.sandbox.result)
+	} else {
+		this.emit('abort', 'parsing script is returning null value!')
+	};	
+}
+
+
+
+