Skip to content

Commit 2d25eba

Browse files
committed
Fix for newer pymongo
1 parent 8376220 commit 2d25eba

File tree

5 files changed

+65
-61
lines changed

5 files changed

+65
-61
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
.env
22
.idea
33
__pycache__/
4+
var/
45
.venv

api/readabilityio.py

+3-5
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import html2text
66
import requests
77

8-
from pymongo import MongoClient
8+
from pymongo import MongoClient, ASCENDING
99
from readability.readability import Document
1010

1111
from settings import MONGO_HOST, MONGO_PORT
@@ -24,12 +24,10 @@ def initialize(self):
2424
self.conn = MongoClient(MONGO_HOST, MONGO_PORT)
2525
self.db = self.conn[READ_DB]
2626
self.log = self.db[LOG_COLL]
27-
self.log.ensure_index("reqtime", 1)
27+
self.log.create_index([("reqtime", ASCENDING)])
2828

2929
def __log(self, logrec):
30-
self.db = self.conn[READ_DB]
31-
self.log = self.db[LOG_COLL]
32-
self.log.save(logrec)
30+
self.log.insert_one(logrec)
3331

3432
def get(self):
3533
rtime = time.time()

settings.py api/settings.py

File renamed without changes.

api/tornading.py

+2-5
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,3 @@
1-
#!/usr/bin/env python
2-
# -*- coding: utf-8 -*-
3-
41
import os
52
import sys
63
import signal
@@ -54,8 +51,8 @@ def daemon():
5451
import tornado.ioloop
5552
import tornado.wsgi
5653

57-
#~ # настраиваем Джанго
58-
from api.readabilityio import application
54+
#~ # настраиваем Торнадо
55+
from readabilityio import application
5956
#
6057
# container = tornado.wsgi.WSGIContainer(application)
6158
http_server = tornado.httpserver.HTTPServer(application)

textmetric/metric.py

+59-51
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,16 @@
11
#!/usr/bin/env python
2-
# -*- coding: utf-8 -*-
2+
33
__author__ = 'ibegtin'
4-
from math import sqrt
54
import csv
6-
5+
from math import sqrt
6+
import os
77

88
from numpy import mean, arange
99

10+
from settings import BASE_DIR, TEXTSBYGRADE_FOLDER
11+
12+
TEXTSBYGRADE_PATH = os.path.join(BASE_DIR, TEXTSBYGRADE_FOLDER)
13+
1014

1115
# Russian sounds and characters
1216
RU_CONSONANTS_LOW = [u'к', u'п', u'с', u'т', u'ф', u'х', u'ц', u'ч', u'ш', u'щ']
@@ -342,55 +346,59 @@ def calc_readability_metrics(text, verbose=True):
342346

343347
def print_metrics(filename, verbose=True):
344348
"""Расчет метрик"""
345-
metrics = calc_text_metrics(filename, verbose)
349+
metrics = calc_text_metrics(filename, verbose)["metrics"]
346350

347-
print u"""
348-
Файл - %s
349-
""" % (filename, )
351+
print(f"(Файл - {filename})")
350352
if verbose:
351-
print u"""
352-
- Символов: %d
353-
- Букв: %d
354-
- Пробелов: %d
355-
- Слов: %d
356-
- Сложных слов: %d
357-
- Слогов: %d
358-
- Предложений: %d
359-
- Доля сложных слов: %f
360-
- Среднее число слогов на слово: %f
361-
- Среднее число слов на предложение: %f
362-
""" %(metrics['chars'], metrics['letters'], metrics['spaces'], metrics['n_words'], metrics['n_complex_words'], metrics['n_syllabes'], metrics['n_sentences'], metrics['c_share'], metrics['avg_syl'], metrics['avg_slen'])#, unfam_words, unf_share)
363-
print '- SMOG: %f' %(calc_SMOG(metrics['n_complex_words'], metrics['n_sentences']))
364-
print '- Gunning fog: %f' %(calc_Gunning_fog(metrics['n_complex_words'], metrics['n_words'], metrics['n_sentences']))
365-
print '- Dale-Chale: %f' %(calc_Dale_Chale_index(metrics['n_complex_words'], metrics['n_words'], metrics['n_sentences']))
366-
print '- Flesh Kincaid: %f' %(calc_Flesh_Kincaid(metrics['n_syllabes'], metrics['n_words'], metrics['n_sentences']))
367-
# print '- Flesh Kincaid (rus): %f' %(calc_Flesh_Kincaid_rus(metrics['n_syllabes'], metrics['n_words'], metrics['n_sentences']))
353+
print(
354+
f"- Символов: {metrics['chars']}\n"
355+
f"- Букв: {metrics['letters']}\n"
356+
f"- Пробелов: {metrics['spaces']}\n"
357+
f"- Слов: {metrics['n_words']}\n"
358+
f"- Сложных слов: {metrics['n_complex_words']}\n"
359+
f"- Слогов: {metrics['n_syllabes']}\n"
360+
f"- Предложений: {metrics['n_sentences']}\n"
361+
f"- Доля сложных слов: {metrics['c_share']}\n"
362+
f"- Слов: {metrics['n_words']}\n"
363+
f"- Среднее число слогов на слово: {metrics['avg_syl']}\n"
364+
f"- Среднее число слов на предложение: {metrics['avg_slen']}\n"
365+
)
366+
print('- SMOG: %f' % (calc_SMOG(metrics['n_complex_words'], metrics['n_sentences'])))
367+
print('- Gunning fog: %f' % (
368+
calc_Gunning_fog(metrics['n_complex_words'], metrics['n_words'], metrics['n_sentences'])))
369+
print(
370+
'- Dale-Chale: %f' % (calc_Dale_Chale(metrics['n_complex_words'], metrics['n_words'], metrics['n_sentences'])))
371+
print(
372+
'- Flesh Kincaid: %f' % (calc_Flesh_Kincaid(metrics['n_syllabes'], metrics['n_words'], metrics['n_sentences'])))
373+
# print('- Flesh Kincaid (rus): %f' %(calc_Flesh_Kincaid_rus(metrics['n_syllabes'], metrics['n_words'], metrics['n_sentences'])))
368374
grade = calc_Flesh_Kincaid_Grade_rus(metrics['n_syllabes'], metrics['n_words'], metrics['n_sentences'])
369375
abs_grade = round(grade)
370-
print '- Flesh Kincaid Grade (rus): %f' %(grade)
376+
print('- Flesh Kincaid Grade (rus): %f' % (grade))
371377
if abs_grade in GRADE_TEXT:
372-
text = GRADE_TEXT[abs_grade]
378+
text = GRADE_TEXT[abs_grade]
373379
elif abs_grade > 17:
374380
text = POST_GRADE_TEXT_18_24
375381
else:
376382
text = u'неизвестно (%d)' % (grade)
377-
print '- Grade level: %s' % text
378-
383+
print('- Grade level: %s' % text)
379384

380385

381386
def generate_all_metrics(outfile="metrics.csv"):
382387
f = open(outfile, 'w')
383-
fieldnames = ['filename', 'name', 'grade', 'index_fk_rus', 'fk_grade_diff', 'index_cl_rus', 'cl_grade_diff', 'index_dc_rus', 'dc_grade_diff', 'index_SMOG_rus', 'SMOG_grade_diff', 'index_ari_rus', 'ari_grade_diff', 'chars', 'spaces', 'letters', 'n_syllabes', 'n_words', 'n_complex_words', 'n_simple_words', 'n_sentences', 'c_share', 'avg_syl', 'avg_slen', 'wsyllabes']
388+
fieldnames = ['filename', 'name', 'grade', 'index_fk_rus', 'fk_grade_diff', 'index_cl_rus', 'cl_grade_diff',
389+
'index_dc_rus', 'dc_grade_diff', 'index_SMOG_rus', 'SMOG_grade_diff', 'index_ari_rus',
390+
'ari_grade_diff', 'chars', 'spaces', 'letters', 'n_syllabes', 'n_words', 'n_complex_words',
391+
'n_simple_words', 'n_sentences', 'c_share', 'avg_syl', 'avg_slen', 'wsyllabes']
384392
writer = csv.DictWriter(f, fieldnames)
385393
writer.writeheader()
386394
diffs = []
387-
avg_diff = 0
388395
for text in TEXT_LIST:
389-
metrics = calc_text_metrics('textsbygrade/%d/%s' %(text[1], text[0]))
390-
print text[0]
396+
metrics = calc_text_metrics(os.path.join(TEXTSBYGRADE_PATH, f"{text[1]}", f"{text[0]}"))
397+
print(text[0])
391398
for k, v in metrics['wsyllabes'].items():
392-
print "- %s: %d of %d (%f)" %(k, v, metrics['n_words'], float(v) * 100.0 / metrics['n_words'])
393-
print '- simple words: %d (%f%%)' % (metrics['n_simple_words'], float(metrics['n_simple_words']) * 100.0 / metrics['n_words'])
399+
print("- %s: %d of %d (%f)" % (k, v, metrics['n_words'], float(v) * 100.0 / metrics['n_words']))
400+
print('- simple words: %d (%f%%)' % (
401+
metrics['n_simple_words'], float(metrics['n_simple_words']) * 100.0 / metrics['n_words']))
394402

395403
metrics['name'] = text[3]
396404
metrics['filename'] = text[0]
@@ -425,30 +433,29 @@ def generate_all_metrics(outfile="metrics.csv"):
425433
grade_diff = metrics['grade'] - metrics['index_ari_rus']
426434
metrics['ari_grade_diff'] = grade_diff
427435

428-
429436
diffs.append(grade_diff)
430437
for k in metrics.keys():
431438
metrics[k] = metrics[k].encode('utf8') if type(metrics[k]) == type(u'') else str(metrics[k])
432439
writer.writerow(metrics)
433440
avg_diff = mean(diffs)
434441
diffs.sort()
435-
print diffs
436-
print avg_diff
442+
print(diffs)
443+
print(avg_diff)
437444
f.close()
438445

446+
439447
def print_all_metrics():
440448
for text in TEXT_LIST:
441-
print "#", text[3].encode('utf8')
442-
print_metrics('textsbygrade/%d/%s' %(text[1], text[0]))
443-
print "----"
444-
449+
print("#", text[3].encode('utf8'))
450+
print_metrics(os.path.join(TEXTSBYGRADE_PATH, f"{text[1]}", f"{text[0]}"))
451+
print("----")
445452

446453

447454
def load_metrics():
448455
allmetrics = []
449456
for text in TEXT_LIST:
450457
# if text[1] > 16: continue
451-
metrics = calc_text_metrics('textsbygrade/%d/%s' %(text[1], text[0]))
458+
metrics = calc_text_metrics(os.path.join(TEXTSBYGRADE_PATH, f"{text[1]}", f"{text[0]}"))
452459
metrics['name'] = text[3]
453460
metrics['filename'] = text[0]
454461
metrics['grade'] = text[1]
@@ -499,13 +506,13 @@ def adapt_algorithm_2r(func, keys=[], ranges=[], expected_max=3.0, expected_mean
499506
total = 1
500507
for r in ranges:
501508
total *= (r[1] - r[0]) / r[2]
502-
print r
509+
print(r)
503510

504511
for r1 in arange(*ranges[0]):
505512
for r2 in arange(*ranges[1]):
506513
n += 1
507514
if n % 1000 == 0:
508-
print 'Processing %d of %d' % (n, total), 'values', r1, r2
515+
print('Processing %d of %d' % (n, total), 'values', r1, r2)
509516
diffs = calc_diff(allmetrics, func, keys, [r1, r2])
510517
# print diffs
511518

@@ -516,13 +523,13 @@ def adapt_algorithm_2r(func, keys=[], ranges=[], expected_max=3.0, expected_mean
516523
if avg_hybrid < best_diff[0]:
517524
best_diff = [avg_hybrid, avg_mean, avg_max]
518525
best_mark = [r1, r2]
519-
print 'Best - x: %f, y: %f with hybrid %f, mean %f and max %f' %(r1, r2, avg_hybrid, avg_mean, avg_max)
526+
print('Best - x: %f, y: %f with hybrid %f, mean %f and max %f' %(r1, r2, avg_hybrid, avg_mean, avg_max))
520527
best_alldiffs = diffs
521528
else:
522529
best_diff = [avg_hybrid, avg_mean, avg_max]
523530
best_mark = [r1, r2]
524531
best_alldiffs = diffs
525-
print 'Best - x: %f, y: %f with value hybrid %f, max %f, mean %f ' %(best_mark[0], best_mark[1], best_diff[0], best_diff[1], best_diff[2])
532+
print('Best - x: %f, y: %f with value hybrid %f, max %f, mean %f ' %(best_mark[0], best_mark[1], best_diff[0], best_diff[1], best_diff[2]))
526533

527534
def adapt_algorithm_3r(func, keys=[], ranges=[], expected_max=3.0, expected_mean=1.1):
528535
best_diff = [-1, -1, -1]
@@ -535,14 +542,14 @@ def adapt_algorithm_3r(func, keys=[], ranges=[], expected_max=3.0, expected_mean
535542
total = 1
536543
for r in ranges:
537544
total *= (r[1] - r[0]) / r[2]
538-
print r
545+
print(r)
539546

540547
for r1 in arange(*ranges[0]):
541548
for r2 in arange(*ranges[1]):
542549
for r3 in arange(*ranges[2]):
543550
n += 1
544551
if n % 1000 == 0:
545-
print 'Processing %d of %d' % (n, total), 'values', r1, r2, r3
552+
print('Processing %d of %d' % (n, total), 'values', r1, r2, r3)
546553
diffs = calc_diff(allmetrics, func, keys, [r1, r2, r3])
547554
if len(diffs) == 0: continue
548555
avg_mean = mean(diffs)
@@ -552,14 +559,15 @@ def adapt_algorithm_3r(func, keys=[], ranges=[], expected_max=3.0, expected_mean
552559
if avg_hybrid < best_diff[0]:
553560
best_diff = [avg_hybrid, avg_mean, avg_max]
554561
best_mark = [r1, r2, r3]
555-
print 'Best - x: %f, y: %f, z: %f with hybrid %f, mean %f and max %f' %(r1, r2, r3, avg_hybrid, avg_mean, avg_max)
556-
# print 'Diffs', diffs
562+
print('Best - x: %f, y: %f, z: %f with hybrid %f, mean %f and max %f' % (
563+
r1, r2, r3, avg_hybrid, avg_mean, avg_max))
564+
# print 'Diffs', diffs
557565
best_alldiffs = diffs
558566
else:
559567
best_diff = [avg_hybrid, avg_mean, avg_max]
560568
best_mark = [r1, r2, r3]
561569
best_alldiffs = diffs
562-
print 'Best - x: %f, y: %f, z: %f with value hybrid %f, mean %f, max %f ' %(best_mark[0], best_mark[1], best_mark[2], best_diff[0], best_diff[1], best_diff[2])
570+
print('Best - x: %f, y: %f, z: %f with value hybrid %f, mean %f, max %f ' %(best_mark[0], best_mark[1], best_mark[2], best_diff[0], best_diff[1], best_diff[2]))
563571

564572

565573
if __name__ == "__main__":

0 commit comments

Comments
 (0)