disable no used comments
This commit is contained in:
@@ -13,7 +13,7 @@ from src.utils.contacts.generate_random_passport_id import get_random_passport_i
|
||||
def get_ordered_combins(stuff):
|
||||
list_to_return = []
|
||||
for i, j in itertools.combinations(range(len(stuff) + 3), 2):
|
||||
print(stuff[i:j])
|
||||
# print(stuff[i:j])
|
||||
list_to_return.append(stuff[i:j])
|
||||
return list_to_return
|
||||
|
||||
@@ -35,7 +35,7 @@ def read_pinyin_list_from_file() -> list:
|
||||
count = 0
|
||||
for line in lines:
|
||||
count += 1
|
||||
print("Line{}: {}".format(count, line.strip()))
|
||||
# print("Line{}: {}".format(count, line.strip()))
|
||||
name_list.append(line.strip())
|
||||
return name_list
|
||||
|
||||
@@ -47,7 +47,7 @@ def read_pinyin_first_name_from_file() -> list:
|
||||
count = 0
|
||||
for line in lines:
|
||||
count += 1
|
||||
print("Line{}: {}".format(count, line.strip()))
|
||||
# print("Line{}: {}".format(count, line.strip()))
|
||||
name_list.append(line.strip())
|
||||
return name_list
|
||||
|
||||
@@ -59,7 +59,7 @@ def read_pinyin_last_name_from_file() -> list:
|
||||
count = 0
|
||||
for line in lines:
|
||||
count += 1
|
||||
print("Line{}: {}".format(count, line.strip()))
|
||||
# print("Line{}: {}".format(count, line.strip()))
|
||||
name_list.append(line.strip())
|
||||
return name_list
|
||||
|
||||
@@ -97,8 +97,8 @@ def get_maximum_length():
|
||||
for first_name in all_first_name:
|
||||
if len(first_name) > max_first_name_lenghth:
|
||||
max_first_name_lenghth = len(first_name)
|
||||
print("max_last_name_lenghth :" + str(max_last_name_lenghth))
|
||||
print("max_first_name_lenghth :" + str(max_first_name_lenghth))
|
||||
# print("max_last_name_lenghth :" + str(max_last_name_lenghth))
|
||||
# print("max_first_name_lenghth :" + str(max_first_name_lenghth))
|
||||
|
||||
|
||||
def has_numbers(inputString):
|
||||
@@ -130,18 +130,18 @@ def generate_name_from_email(mail_address, pinyin_name_list):
|
||||
# key_words = HanLP.extractKeyword(mail_address, 2)
|
||||
# print(key_words)
|
||||
# setence = "".join(key_words)
|
||||
print("generate for " + mail_address)
|
||||
# print("generate for " + mail_address)
|
||||
all_combins = get_ordered_combins(mail_address)
|
||||
all_combins.sort(key=len, reverse=True)
|
||||
no_duplicated_list = []
|
||||
for word in all_combins:
|
||||
if word not in no_duplicated_list:
|
||||
no_duplicated_list.append(word)
|
||||
print(all_combins)
|
||||
# print(all_combins)
|
||||
|
||||
for i in no_duplicated_list:
|
||||
word_to_test = "".join(i)
|
||||
print("word to test is " + word_to_test)
|
||||
# print("word to test is " + word_to_test)
|
||||
# if len(word_to_test) >= 11:
|
||||
# found_name = check_name(word_to_test=word_to_test, pinyin_name_list=pinyin_name_list)
|
||||
# if found_name is not None:
|
||||
@@ -250,17 +250,17 @@ def filter_already_validated_contacts(_list_to_extract):
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
start_position = 14500
|
||||
end_position = 15084
|
||||
start_position = 1
|
||||
end_position = 502
|
||||
# x = range(start_position, end_position, 100)
|
||||
step = 100
|
||||
step = 500
|
||||
step_list = range(start_position, end_position, step)
|
||||
print(step_list[-1])
|
||||
for x in step_list:
|
||||
print(x)
|
||||
_from_position = x
|
||||
_end_position = x + step
|
||||
print("_end_position is {}".format(_end_position))
|
||||
print("_from_position is {},_end_position is {}".format(_from_position, _end_position))
|
||||
if _end_position <= step_list[-1]:
|
||||
print("start extraction from {} to {}".format(_from_position, _end_position))
|
||||
generate_contact_from_mail_list(_from_position, _end_position)
|
||||
|
||||
Reference in New Issue
Block a user