162 lines
3.2 KiB
JavaScript
162 lines
3.2 KiB
JavaScript
|
/*
|
||
|
Module dependencies
|
||
|
*/
|
||
|
|
||
|
var path = require('path'),
|
||
|
parse = require('./parse'),
|
||
|
_ = require('lodash');
|
||
|
|
||
|
/*
|
||
|
* The API
|
||
|
*/
|
||
|
|
||
|
var api = [
|
||
|
require('./api/attributes'),
|
||
|
require('./api/traversing'),
|
||
|
require('./api/manipulation'),
|
||
|
require('./api/css')
|
||
|
];
|
||
|
|
||
|
/*
|
||
|
* A simple way to check for HTML strings or ID strings
|
||
|
*/
|
||
|
|
||
|
var quickExpr = /^(?:[^#<]*(<[\w\W]+>)[^>]*$|#([\w\-]*)$)/;
|
||
|
|
||
|
/*
|
||
|
* Instance of cheerio
|
||
|
*/
|
||
|
|
||
|
var Cheerio = module.exports = function(selector, context, root, options) {
|
||
|
if (!(this instanceof Cheerio)) return new Cheerio(selector, context, root, options);
|
||
|
|
||
|
this.options = _.defaults(options || {}, this.options);
|
||
|
|
||
|
// $(), $(null), $(undefined), $(false)
|
||
|
if (!selector) return this;
|
||
|
|
||
|
if (root) {
|
||
|
if (typeof root === 'string') root = parse(root, this.options);
|
||
|
this._root = Cheerio.call(this, root);
|
||
|
}
|
||
|
|
||
|
// $($)
|
||
|
if (selector.cheerio) return selector;
|
||
|
|
||
|
// $(dom)
|
||
|
if (isNode(selector))
|
||
|
selector = [selector];
|
||
|
|
||
|
// $([dom])
|
||
|
if (Array.isArray(selector)) {
|
||
|
_.forEach(selector, function(elem, idx) {
|
||
|
this[idx] = elem;
|
||
|
}, this);
|
||
|
this.length = selector.length;
|
||
|
return this;
|
||
|
}
|
||
|
|
||
|
// $(<html>)
|
||
|
if (typeof selector === 'string' && isHtml(selector)) {
|
||
|
return Cheerio.call(this, parse(selector, this.options).children);
|
||
|
}
|
||
|
|
||
|
// If we don't have a context, maybe we have a root, from loading
|
||
|
if (!context) {
|
||
|
context = this._root;
|
||
|
} else if (typeof context === 'string') {
|
||
|
if (isHtml(context)) {
|
||
|
// $('li', '<ul>...</ul>')
|
||
|
context = parse(context, this.options);
|
||
|
context = Cheerio.call(this, context);
|
||
|
} else {
|
||
|
// $('li', 'ul')
|
||
|
selector = [context, selector].join(' ');
|
||
|
context = this._root;
|
||
|
}
|
||
|
// $('li', node), $('li', [nodes])
|
||
|
} else if (!context.cheerio) {
|
||
|
context = Cheerio.call(this, context);
|
||
|
}
|
||
|
|
||
|
// If we still don't have a context, return
|
||
|
if (!context) return this;
|
||
|
|
||
|
// #id, .class, tag
|
||
|
return context.find(selector);
|
||
|
};
|
||
|
|
||
|
/**
|
||
|
* Mix in `static`
|
||
|
*/
|
||
|
|
||
|
_.extend(Cheerio, require('./static'));
|
||
|
|
||
|
/*
|
||
|
* Set a signature of the object
|
||
|
*/
|
||
|
|
||
|
Cheerio.prototype.cheerio = '[cheerio object]';
|
||
|
|
||
|
/*
|
||
|
* Cheerio default options
|
||
|
*/
|
||
|
|
||
|
Cheerio.prototype.options = {
|
||
|
normalizeWhitespace: false,
|
||
|
xmlMode: false,
|
||
|
decodeEntities: true
|
||
|
};
|
||
|
|
||
|
/*
|
||
|
* Make cheerio an array-like object
|
||
|
*/
|
||
|
|
||
|
Cheerio.prototype.length = 0;
|
||
|
Cheerio.prototype.splice = Array.prototype.splice;
|
||
|
|
||
|
/*
|
||
|
* Check if string is HTML
|
||
|
*/
|
||
|
var isHtml = function(str) {
|
||
|
// Faster than running regex, if str starts with `<` and ends with `>`, assume it's HTML
|
||
|
if (str.charAt(0) === '<' && str.charAt(str.length - 1) === '>' && str.length >= 3) return true;
|
||
|
|
||
|
// Run the regex
|
||
|
var match = quickExpr.exec(str);
|
||
|
return !!(match && match[1]);
|
||
|
};
|
||
|
|
||
|
/*
|
||
|
* Make a cheerio object
|
||
|
*
|
||
|
* @api private
|
||
|
*/
|
||
|
|
||
|
Cheerio.prototype._make = function(dom, context) {
|
||
|
var cheerio = new Cheerio(dom, context, this._root, this.options);
|
||
|
cheerio.prevObject = this;
|
||
|
return cheerio;
|
||
|
};
|
||
|
|
||
|
/**
|
||
|
* Turn a cheerio object into an array
|
||
|
*
|
||
|
* @deprecated
|
||
|
*/
|
||
|
|
||
|
Cheerio.prototype.toArray = function() {
|
||
|
return this.get();
|
||
|
};
|
||
|
|
||
|
/**
|
||
|
* Plug in the API
|
||
|
*/
|
||
|
api.forEach(function(mod) {
|
||
|
_.extend(Cheerio.prototype, mod);
|
||
|
});
|
||
|
|
||
|
var isNode = function(obj) {
|
||
|
return obj.name || obj.type === 'text' || obj.type === 'comment';
|
||
|
};
|