Skip to content

Commit 7c689be

Browse files
committed
First release
0 parents  commit 7c689be

21 files changed

+2410
-0
lines changed

History.md

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
0.1 / 2011-07-18
2+
==================
3+
4+
* Initial commit

LICENSE

+27
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
Copyright (c) 2011, Francois-Guillaume Ribreau <[email protected]>
2+
All rights reserved.
3+
4+
Redistribution and use in source and binary forms, with or without
5+
modification, are permitted provided that the following conditions are met:
6+
7+
* Redistributions of source code must retain the above copyright notice,
8+
this list of conditions and the following disclaimer.
9+
10+
* Redistributions in binary form must reproduce the above copyright notice,
11+
this list of conditions and the following disclaimer in the documentation
12+
and/or other materials provided with the distribution.
13+
14+
* Neither the name of node-webworkers nor the names of its contributors
15+
may be used to endorse or promote products derived from this software
16+
without specific prior written permission.
17+
18+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
19+
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20+
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

Makefile

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
test:
2+
nodeunit ./test/*.js
3+
4+
.PHONY: test

README.md

+56
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
`LanguageDetect` is a port of the [PEAR::Text_LanguageDetect](http://pear.php.net/package/Text_LanguageDetect) for [node.js](http://nodejs.org).
2+
3+
LanguageDetect can identify 52 human languages from text samples and return confidence scores for each.
4+
5+
### Installation
6+
7+
This package can be installed via [npm](http://npmjs.org/) as follows
8+
9+
% npm install languagedetect -g
10+
11+
### Example
12+
13+
var LanguageDetect = require('languagedetect');
14+
var lngDetector = new LanguageDetect();
15+
16+
// OR
17+
// var lngDetector = new (require('languagedetect'));
18+
19+
console.log(lngDetector.detect('This is a test.'));
20+
21+
/*
22+
Will print:
23+
[ [ 'english', 0.5969230769230769 ],
24+
[ 'hungarian', 0.407948717948718 ],
25+
[ 'latin', 0.39205128205128204 ],
26+
[ 'french', 0.367948717948718 ],
27+
[ 'portuguese', 0.3669230769230769 ],
28+
[ 'estonian', 0.3507692307692307 ],
29+
[ 'latvian', 0.2615384615384615 ],
30+
[ 'spanish', 0.2597435897435898 ],
31+
[ 'slovak', 0.25051282051282053 ],
32+
[ 'dutch', 0.2482051282051282 ],
33+
[ 'lithuanian', 0.2466666666666667 ],
34+
... ]
35+
*/
36+
37+
// Only get first 2 results
38+
console.log(lngDetector.detect('This is a test.', 2));
39+
40+
/*
41+
[ [ 'english', 0.5969230769230769 ], [ 'hungarian', 0.407948717948718 ] ]
42+
*/
43+
44+
### API
45+
46+
* `detect(sample, limit)` Detects the closeness of a sample of text to the known languages
47+
* `getLanguages()` Returns the list of detectable languages
48+
* `getLanguageCount()` Returns the number of languages that the lib can detect
49+
50+
### Benchmark
51+
52+
Currently LanguageDetect handles 1000 items in 2 seconds (vs 7.5 seconds with PEAR::Text_LanguageDetect)
53+
54+
### Credits
55+
56+
Nicholas Pisarro for his work on [PEAR::Text_LanguageDetect](http://pear.php.net/package/Text_LanguageDetect)

benchmark/benchmark.coffee

+14
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
os = require 'os'
2+
data = require './data/text'
3+
LanguageDetect = require '../index'
4+
5+
l = new LanguageDetect()
6+
7+
t = os.uptime();
8+
ok = 0
9+
10+
for text in data
11+
r = l.detect(text.text)
12+
ok++ if r[0][1] > 0.2
13+
14+
console.log "#{data.length} items processed in #{os.uptime()-t} secs (#{ok} with a score > 0.2)"

benchmark/benchmark.js

+17
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
(function() {
2+
var LanguageDetect, data, l, ok, os, r, t, text, _i, _len;
3+
os = require('os');
4+
data = require('./data/text');
5+
LanguageDetect = require('../index');
6+
l = new LanguageDetect();
7+
t = os.uptime();
8+
ok = 0;
9+
for (_i = 0, _len = data.length; _i < _len; _i++) {
10+
text = data[_i];
11+
r = l.detect(text.text);
12+
if (r[0][1] > 0.2) {
13+
ok++;
14+
}
15+
}
16+
console.log("" + data.length + " items processed in " + (os.uptime() - t) + " secs (" + ok + " with a score > 0.2)");
17+
}).call(this);

benchmark/benchmark.php

+25
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
<?php
2+
header('Content-type:text/html; charset=utf-8');
3+
4+
require_once 'Text/LanguageDetect.php';
5+
$l = new Text_LanguageDetect();
6+
7+
$f = file_get_contents(dirname(__FILE__).'/data/text.js');
8+
9+
// Cleaning the JS stuff
10+
$f = str_replace('module.exports = ', '', substr($f, 0, strlen($f)-2));
11+
12+
$a = json_decode($f);
13+
14+
$t = time();
15+
$ok = 0;
16+
for ($i=0, $iM = count($a); $i < $iM; $i++) {
17+
18+
$r = $l->detect($a[$i]->text);
19+
20+
$k = array_keys($r);
21+
if($r[$k[0]] > 0.2){
22+
$ok++;
23+
}
24+
}
25+
echo time()-$t.'secs '.$iM.' items en PHP ('.$ok.' ont un bon score)';

benchmark/data/text.js

+1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

data/lang.dat

+1
Large diffs are not rendered by default.

data/lang.js

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
module.exports = (function(){
2+
// TODO: Convert the file to json
3+
return require('../lib/phpjs').unserialize(
4+
require('fs').readFileSync(__dirname+'/lang.dat', 'utf-8')
5+
);
6+
})()

data/unicode_blocks.dat

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
a:145:{i:0;a:3:{i:0;s:6:"0x0000";i:1;s:6:"0x007F";i:2;s:11:"Basic Latin";}i:1;a:3:{i:0;s:6:"0x0080";i:1;s:6:"0x00FF";i:2;s:18:"Latin-1 Supplement";}i:2;a:3:{i:0;s:6:"0x0100";i:1;s:6:"0x017F";i:2;s:16:"Latin Extended-A";}i:3;a:3:{i:0;s:6:"0x0180";i:1;s:6:"0x024F";i:2;s:16:"Latin Extended-B";}i:4;a:3:{i:0;s:6:"0x0250";i:1;s:6:"0x02AF";i:2;s:14:"IPA Extensions";}i:5;a:3:{i:0;s:6:"0x02B0";i:1;s:6:"0x02FF";i:2;s:24:"Spacing Modifier Letters";}i:6;a:3:{i:0;s:6:"0x0300";i:1;s:6:"0x036F";i:2;s:27:"Combining Diacritical Marks";}i:7;a:3:{i:0;s:6:"0x0370";i:1;s:6:"0x03FF";i:2;s:16:"Greek and Coptic";}i:8;a:3:{i:0;s:6:"0x0400";i:1;s:6:"0x04FF";i:2;s:8:"Cyrillic";}i:9;a:3:{i:0;s:6:"0x0500";i:1;s:6:"0x052F";i:2;s:19:"Cyrillic Supplement";}i:10;a:3:{i:0;s:6:"0x0530";i:1;s:6:"0x058F";i:2;s:8:"Armenian";}i:11;a:3:{i:0;s:6:"0x0590";i:1;s:6:"0x05FF";i:2;s:6:"Hebrew";}i:12;a:3:{i:0;s:6:"0x0600";i:1;s:6:"0x06FF";i:2;s:6:"Arabic";}i:13;a:3:{i:0;s:6:"0x0700";i:1;s:6:"0x074F";i:2;s:6:"Syriac";}i:14;a:3:{i:0;s:6:"0x0750";i:1;s:6:"0x077F";i:2;s:17:"Arabic Supplement";}i:15;a:3:{i:0;s:6:"0x0780";i:1;s:6:"0x07BF";i:2;s:6:"Thaana";}i:16;a:3:{i:0;s:6:"0x0900";i:1;s:6:"0x097F";i:2;s:10:"Devanagari";}i:17;a:3:{i:0;s:6:"0x0980";i:1;s:6:"0x09FF";i:2;s:7:"Bengali";}i:18;a:3:{i:0;s:6:"0x0A00";i:1;s:6:"0x0A7F";i:2;s:8:"Gurmukhi";}i:19;a:3:{i:0;s:6:"0x0A80";i:1;s:6:"0x0AFF";i:2;s:8:"Gujarati";}i:20;a:3:{i:0;s:6:"0x0B00";i:1;s:6:"0x0B7F";i:2;s:5:"Oriya";}i:21;a:3:{i:0;s:6:"0x0B80";i:1;s:6:"0x0BFF";i:2;s:5:"Tamil";}i:22;a:3:{i:0;s:6:"0x0C00";i:1;s:6:"0x0C7F";i:2;s:6:"Telugu";}i:23;a:3:{i:0;s:6:"0x0C80";i:1;s:6:"0x0CFF";i:2;s:7:"Kannada";}i:24;a:3:{i:0;s:6:"0x0D00";i:1;s:6:"0x0D7F";i:2;s:9:"Malayalam";}i:25;a:3:{i:0;s:6:"0x0D80";i:1;s:6:"0x0DFF";i:2;s:7:"Sinhala";}i:26;a:3:{i:0;s:6:"0x0E00";i:1;s:6:"0x0E7F";i:2;s:4:"Thai";}i:27;a:3:{i:0;s:6:"0x0E80";i:1;s:6:"0x0EFF";i:2;s:3:"Lao";}i:28;a:3:{i:0;s:6:"0x0F00";i:1;s:6:"0x0FFF";i:2;s:7:"Tibetan";}i:29;a:3:{i:0;s:6:"0x1000";i:1;s:6:"0x109F";i:2;s:7:"Myanmar";}i:30;a:3:{i:0;s:6:"0x10A0";i:1;s:6:"0x10FF";i:2;s:8:"Georgian";}i:31;a:3:{i:0;s:6:"0x1100";i:1;s:6:"0x11FF";i:2;s:11:"Hangul Jamo";}i:32;a:3:{i:0;s:6:"0x1200";i:1;s:6:"0x137F";i:2;s:8:"Ethiopic";}i:33;a:3:{i:0;s:6:"0x1380";i:1;s:6:"0x139F";i:2;s:19:"Ethiopic Supplement";}i:34;a:3:{i:0;s:6:"0x13A0";i:1;s:6:"0x13FF";i:2;s:8:"Cherokee";}i:35;a:3:{i:0;s:6:"0x1400";i:1;s:6:"0x167F";i:2;s:37:"Unified Canadian Aboriginal Syllabics";}i:36;a:3:{i:0;s:6:"0x1680";i:1;s:6:"0x169F";i:2;s:5:"Ogham";}i:37;a:3:{i:0;s:6:"0x16A0";i:1;s:6:"0x16FF";i:2;s:5:"Runic";}i:38;a:3:{i:0;s:6:"0x1700";i:1;s:6:"0x171F";i:2;s:7:"Tagalog";}i:39;a:3:{i:0;s:6:"0x1720";i:1;s:6:"0x173F";i:2;s:7:"Hanunoo";}i:40;a:3:{i:0;s:6:"0x1740";i:1;s:6:"0x175F";i:2;s:5:"Buhid";}i:41;a:3:{i:0;s:6:"0x1760";i:1;s:6:"0x177F";i:2;s:8:"Tagbanwa";}i:42;a:3:{i:0;s:6:"0x1780";i:1;s:6:"0x17FF";i:2;s:5:"Khmer";}i:43;a:3:{i:0;s:6:"0x1800";i:1;s:6:"0x18AF";i:2;s:9:"Mongolian";}i:44;a:3:{i:0;s:6:"0x1900";i:1;s:6:"0x194F";i:2;s:5:"Limbu";}i:45;a:3:{i:0;s:6:"0x1950";i:1;s:6:"0x197F";i:2;s:6:"Tai Le";}i:46;a:3:{i:0;s:6:"0x1980";i:1;s:6:"0x19DF";i:2;s:11:"New Tai Lue";}i:47;a:3:{i:0;s:6:"0x19E0";i:1;s:6:"0x19FF";i:2;s:13:"Khmer Symbols";}i:48;a:3:{i:0;s:6:"0x1A00";i:1;s:6:"0x1A1F";i:2;s:8:"Buginese";}i:49;a:3:{i:0;s:6:"0x1D00";i:1;s:6:"0x1D7F";i:2;s:19:"Phonetic Extensions";}i:50;a:3:{i:0;s:6:"0x1D80";i:1;s:6:"0x1DBF";i:2;s:30:"Phonetic Extensions Supplement";}i:51;a:3:{i:0;s:6:"0x1DC0";i:1;s:6:"0x1DFF";i:2;s:38:"Combining Diacritical Marks Supplement";}i:52;a:3:{i:0;s:6:"0x1E00";i:1;s:6:"0x1EFF";i:2;s:25:"Latin Extended Additional";}i:53;a:3:{i:0;s:6:"0x1F00";i:1;s:6:"0x1FFF";i:2;s:14:"Greek Extended";}i:54;a:3:{i:0;s:6:"0x2000";i:1;s:6:"0x206F";i:2;s:19:"General Punctuation";}i:55;a:3:{i:0;s:6:"0x2070";i:1;s:6:"0x209F";i:2;s:27:"Superscripts and Subscripts";}i:56;a:3:{i:0;s:6:"0x20A0";i:1;s:6:"0x20CF";i:2;s:16:"Currency Symbols";}i:57;a:3:{i:0;s:6:"0x20D0";i:1;s:6:"0x20FF";i:2;s:39:"Combining Diacritical Marks for Symbols";}i:58;a:3:{i:0;s:6:"0x2100";i:1;s:6:"0x214F";i:2;s:18:"Letterlike Symbols";}i:59;a:3:{i:0;s:6:"0x2150";i:1;s:6:"0x218F";i:2;s:12:"Number Forms";}i:60;a:3:{i:0;s:6:"0x2190";i:1;s:6:"0x21FF";i:2;s:6:"Arrows";}i:61;a:3:{i:0;s:6:"0x2200";i:1;s:6:"0x22FF";i:2;s:22:"Mathematical Operators";}i:62;a:3:{i:0;s:6:"0x2300";i:1;s:6:"0x23FF";i:2;s:23:"Miscellaneous Technical";}i:63;a:3:{i:0;s:6:"0x2400";i:1;s:6:"0x243F";i:2;s:16:"Control Pictures";}i:64;a:3:{i:0;s:6:"0x2440";i:1;s:6:"0x245F";i:2;s:29:"Optical Character Recognition";}i:65;a:3:{i:0;s:6:"0x2460";i:1;s:6:"0x24FF";i:2;s:22:"Enclosed Alphanumerics";}i:66;a:3:{i:0;s:6:"0x2500";i:1;s:6:"0x257F";i:2;s:11:"Box Drawing";}i:67;a:3:{i:0;s:6:"0x2580";i:1;s:6:"0x259F";i:2;s:14:"Block Elements";}i:68;a:3:{i:0;s:6:"0x25A0";i:1;s:6:"0x25FF";i:2;s:16:"Geometric Shapes";}i:69;a:3:{i:0;s:6:"0x2600";i:1;s:6:"0x26FF";i:2;s:21:"Miscellaneous Symbols";}i:70;a:3:{i:0;s:6:"0x2700";i:1;s:6:"0x27BF";i:2;s:8:"Dingbats";}i:71;a:3:{i:0;s:6:"0x27C0";i:1;s:6:"0x27EF";i:2;s:36:"Miscellaneous Mathematical Symbols-A";}i:72;a:3:{i:0;s:6:"0x27F0";i:1;s:6:"0x27FF";i:2;s:21:"Supplemental Arrows-A";}i:73;a:3:{i:0;s:6:"0x2800";i:1;s:6:"0x28FF";i:2;s:16:"Braille Patterns";}i:74;a:3:{i:0;s:6:"0x2900";i:1;s:6:"0x297F";i:2;s:21:"Supplemental Arrows-B";}i:75;a:3:{i:0;s:6:"0x2980";i:1;s:6:"0x29FF";i:2;s:36:"Miscellaneous Mathematical Symbols-B";}i:76;a:3:{i:0;s:6:"0x2A00";i:1;s:6:"0x2AFF";i:2;s:35:"Supplemental Mathematical Operators";}i:77;a:3:{i:0;s:6:"0x2B00";i:1;s:6:"0x2BFF";i:2;s:32:"Miscellaneous Symbols and Arrows";}i:78;a:3:{i:0;s:6:"0x2C00";i:1;s:6:"0x2C5F";i:2;s:10:"Glagolitic";}i:79;a:3:{i:0;s:6:"0x2C80";i:1;s:6:"0x2CFF";i:2;s:6:"Coptic";}i:80;a:3:{i:0;s:6:"0x2D00";i:1;s:6:"0x2D2F";i:2;s:19:"Georgian Supplement";}i:81;a:3:{i:0;s:6:"0x2D30";i:1;s:6:"0x2D7F";i:2;s:8:"Tifinagh";}i:82;a:3:{i:0;s:6:"0x2D80";i:1;s:6:"0x2DDF";i:2;s:17:"Ethiopic Extended";}i:83;a:3:{i:0;s:6:"0x2E00";i:1;s:6:"0x2E7F";i:2;s:24:"Supplemental Punctuation";}i:84;a:3:{i:0;s:6:"0x2E80";i:1;s:6:"0x2EFF";i:2;s:23:"CJK Radicals Supplement";}i:85;a:3:{i:0;s:6:"0x2F00";i:1;s:6:"0x2FDF";i:2;s:15:"Kangxi Radicals";}i:86;a:3:{i:0;s:6:"0x2FF0";i:1;s:6:"0x2FFF";i:2;s:34:"Ideographic Description Characters";}i:87;a:3:{i:0;s:6:"0x3000";i:1;s:6:"0x303F";i:2;s:27:"CJK Symbols and Punctuation";}i:88;a:3:{i:0;s:6:"0x3040";i:1;s:6:"0x309F";i:2;s:8:"Hiragana";}i:89;a:3:{i:0;s:6:"0x30A0";i:1;s:6:"0x30FF";i:2;s:8:"Katakana";}i:90;a:3:{i:0;s:6:"0x3100";i:1;s:6:"0x312F";i:2;s:8:"Bopomofo";}i:91;a:3:{i:0;s:6:"0x3130";i:1;s:6:"0x318F";i:2;s:25:"Hangul Compatibility Jamo";}i:92;a:3:{i:0;s:6:"0x3190";i:1;s:6:"0x319F";i:2;s:6:"Kanbun";}i:93;a:3:{i:0;s:6:"0x31A0";i:1;s:6:"0x31BF";i:2;s:17:"Bopomofo Extended";}i:94;a:3:{i:0;s:6:"0x31C0";i:1;s:6:"0x31EF";i:2;s:11:"CJK Strokes";}i:95;a:3:{i:0;s:6:"0x31F0";i:1;s:6:"0x31FF";i:2;s:28:"Katakana Phonetic Extensions";}i:96;a:3:{i:0;s:6:"0x3200";i:1;s:6:"0x32FF";i:2;s:31:"Enclosed CJK Letters and Months";}i:97;a:3:{i:0;s:6:"0x3300";i:1;s:6:"0x33FF";i:2;s:17:"CJK Compatibility";}i:98;a:3:{i:0;s:6:"0x3400";i:1;s:6:"0x4DBF";i:2;s:34:"CJK Unified Ideographs Extension A";}i:99;a:3:{i:0;s:6:"0x4DC0";i:1;s:6:"0x4DFF";i:2;s:23:"Yijing Hexagram Symbols";}i:100;a:3:{i:0;s:6:"0x4E00";i:1;s:6:"0x9FFF";i:2;s:22:"CJK Unified Ideographs";}i:101;a:3:{i:0;s:6:"0xA000";i:1;s:6:"0xA48F";i:2;s:12:"Yi Syllables";}i:102;a:3:{i:0;s:6:"0xA490";i:1;s:6:"0xA4CF";i:2;s:11:"Yi Radicals";}i:103;a:3:{i:0;s:6:"0xA700";i:1;s:6:"0xA71F";i:2;s:21:"Modifier Tone Letters";}i:104;a:3:{i:0;s:6:"0xA800";i:1;s:6:"0xA82F";i:2;s:12:"Syloti Nagri";}i:105;a:3:{i:0;s:6:"0xAC00";i:1;s:6:"0xD7AF";i:2;s:16:"Hangul Syllables";}i:106;a:3:{i:0;s:6:"0xD800";i:1;s:6:"0xDB7F";i:2;s:15:"High Surrogates";}i:107;a:3:{i:0;s:6:"0xDB80";i:1;s:6:"0xDBFF";i:2;s:27:"High Private Use Surrogates";}i:108;a:3:{i:0;s:6:"0xDC00";i:1;s:6:"0xDFFF";i:2;s:14:"Low Surrogates";}i:109;a:3:{i:0;s:6:"0xE000";i:1;s:6:"0xF8FF";i:2;s:16:"Private Use Area";}i:110;a:3:{i:0;s:6:"0xF900";i:1;s:6:"0xFAFF";i:2;s:28:"CJK Compatibility Ideographs";}i:111;a:3:{i:0;s:6:"0xFB00";i:1;s:6:"0xFB4F";i:2;s:29:"Alphabetic Presentation Forms";}i:112;a:3:{i:0;s:6:"0xFB50";i:1;s:6:"0xFDFF";i:2;s:27:"Arabic Presentation Forms-A";}i:113;a:3:{i:0;s:6:"0xFE00";i:1;s:6:"0xFE0F";i:2;s:19:"Variation Selectors";}i:114;a:3:{i:0;s:6:"0xFE10";i:1;s:6:"0xFE1F";i:2;s:14:"Vertical Forms";}i:115;a:3:{i:0;s:6:"0xFE20";i:1;s:6:"0xFE2F";i:2;s:20:"Combining Half Marks";}i:116;a:3:{i:0;s:6:"0xFE30";i:1;s:6:"0xFE4F";i:2;s:23:"CJK Compatibility Forms";}i:117;a:3:{i:0;s:6:"0xFE50";i:1;s:6:"0xFE6F";i:2;s:19:"Small Form Variants";}i:118;a:3:{i:0;s:6:"0xFE70";i:1;s:6:"0xFEFF";i:2;s:27:"Arabic Presentation Forms-B";}i:119;a:3:{i:0;s:6:"0xFF00";i:1;s:6:"0xFFEF";i:2;s:29:"Halfwidth and Fullwidth Forms";}i:120;a:3:{i:0;s:6:"0xFFF0";i:1;s:6:"0xFFFF";i:2;s:8:"Specials";}i:121;a:3:{i:0;s:7:"0x10000";i:1;s:7:"0x1007F";i:2;s:18:"Linear B Syllabary";}i:122;a:3:{i:0;s:7:"0x10080";i:1;s:7:"0x100FF";i:2;s:18:"Linear B Ideograms";}i:123;a:3:{i:0;s:7:"0x10100";i:1;s:7:"0x1013F";i:2;s:14:"Aegean Numbers";}i:124;a:3:{i:0;s:7:"0x10140";i:1;s:7:"0x1018F";i:2;s:21:"Ancient Greek Numbers";}i:125;a:3:{i:0;s:7:"0x10300";i:1;s:7:"0x1032F";i:2;s:10:"Old Italic";}i:126;a:3:{i:0;s:7:"0x10330";i:1;s:7:"0x1034F";i:2;s:6:"Gothic";}i:127;a:3:{i:0;s:7:"0x10380";i:1;s:7:"0x1039F";i:2;s:8:"Ugaritic";}i:128;a:3:{i:0;s:7:"0x103A0";i:1;s:7:"0x103DF";i:2;s:11:"Old Persian";}i:129;a:3:{i:0;s:7:"0x10400";i:1;s:7:"0x1044F";i:2;s:7:"Deseret";}i:130;a:3:{i:0;s:7:"0x10450";i:1;s:7:"0x1047F";i:2;s:7:"Shavian";}i:131;a:3:{i:0;s:7:"0x10480";i:1;s:7:"0x104AF";i:2;s:7:"Osmanya";}i:132;a:3:{i:0;s:7:"0x10800";i:1;s:7:"0x1083F";i:2;s:17:"Cypriot Syllabary";}i:133;a:3:{i:0;s:7:"0x10A00";i:1;s:7:"0x10A5F";i:2;s:10:"Kharoshthi";}i:134;a:3:{i:0;s:7:"0x1D000";i:1;s:7:"0x1D0FF";i:2;s:25:"Byzantine Musical Symbols";}i:135;a:3:{i:0;s:7:"0x1D100";i:1;s:7:"0x1D1FF";i:2;s:15:"Musical Symbols";}i:136;a:3:{i:0;s:7:"0x1D200";i:1;s:7:"0x1D24F";i:2;s:30:"Ancient Greek Musical Notation";}i:137;a:3:{i:0;s:7:"0x1D300";i:1;s:7:"0x1D35F";i:2;s:21:"Tai Xuan Jing Symbols";}i:138;a:3:{i:0;s:7:"0x1D400";i:1;s:7:"0x1D7FF";i:2;s:33:"Mathematical Alphanumeric Symbols";}i:139;a:3:{i:0;s:7:"0x20000";i:1;s:7:"0x2A6DF";i:2;s:34:"CJK Unified Ideographs Extension B";}i:140;a:3:{i:0;s:7:"0x2F800";i:1;s:7:"0x2FA1F";i:2;s:39:"CJK Compatibility Ideographs Supplement";}i:141;a:3:{i:0;s:7:"0xE0000";i:1;s:7:"0xE007F";i:2;s:4:"Tags";}i:142;a:3:{i:0;s:7:"0xE0100";i:1;s:7:"0xE01EF";i:2;s:30:"Variation Selectors Supplement";}i:143;a:3:{i:0;s:7:"0xF0000";i:1;s:7:"0xFFFFF";i:2;s:32:"Supplementary Private Use Area-A";}i:144;a:3:{i:0;s:8:"0x100000";i:1;s:8:"0x10FFFF";i:2;s:32:"Supplementary Private Use Area-B";}}

data/unicode_blocks.js

+13
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
module.exports = (function(){
2+
// TODO: Convert the file to json
3+
var r = require('../lib/phpjs').unserialize(
4+
require('fs').readFileSync(__dirname+'/unicode_blocks.dat', 'utf-8')
5+
);
6+
7+
// Bugfix: unserialize returns an Object instead of an Array
8+
var a = [];
9+
for (var i = 0, iM = Object.keys(r).length; i < iM; i++) {
10+
a[i] = r[i];
11+
};
12+
return a;
13+
})()

index.js

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
module.exports = require('./lib/LanguageDetect');

0 commit comments

Comments
 (0)