disable no used comments
This commit is contained in:
@@ -13,7 +13,7 @@ from src.utils.contacts.generate_random_passport_id import get_random_passport_i
|
|||||||
def get_ordered_combins(stuff):
|
def get_ordered_combins(stuff):
|
||||||
list_to_return = []
|
list_to_return = []
|
||||||
for i, j in itertools.combinations(range(len(stuff) + 3), 2):
|
for i, j in itertools.combinations(range(len(stuff) + 3), 2):
|
||||||
print(stuff[i:j])
|
# print(stuff[i:j])
|
||||||
list_to_return.append(stuff[i:j])
|
list_to_return.append(stuff[i:j])
|
||||||
return list_to_return
|
return list_to_return
|
||||||
|
|
||||||
@@ -35,7 +35,7 @@ def read_pinyin_list_from_file() -> list:
|
|||||||
count = 0
|
count = 0
|
||||||
for line in lines:
|
for line in lines:
|
||||||
count += 1
|
count += 1
|
||||||
print("Line{}: {}".format(count, line.strip()))
|
# print("Line{}: {}".format(count, line.strip()))
|
||||||
name_list.append(line.strip())
|
name_list.append(line.strip())
|
||||||
return name_list
|
return name_list
|
||||||
|
|
||||||
@@ -47,7 +47,7 @@ def read_pinyin_first_name_from_file() -> list:
|
|||||||
count = 0
|
count = 0
|
||||||
for line in lines:
|
for line in lines:
|
||||||
count += 1
|
count += 1
|
||||||
print("Line{}: {}".format(count, line.strip()))
|
# print("Line{}: {}".format(count, line.strip()))
|
||||||
name_list.append(line.strip())
|
name_list.append(line.strip())
|
||||||
return name_list
|
return name_list
|
||||||
|
|
||||||
@@ -59,7 +59,7 @@ def read_pinyin_last_name_from_file() -> list:
|
|||||||
count = 0
|
count = 0
|
||||||
for line in lines:
|
for line in lines:
|
||||||
count += 1
|
count += 1
|
||||||
print("Line{}: {}".format(count, line.strip()))
|
# print("Line{}: {}".format(count, line.strip()))
|
||||||
name_list.append(line.strip())
|
name_list.append(line.strip())
|
||||||
return name_list
|
return name_list
|
||||||
|
|
||||||
@@ -97,8 +97,8 @@ def get_maximum_length():
|
|||||||
for first_name in all_first_name:
|
for first_name in all_first_name:
|
||||||
if len(first_name) > max_first_name_lenghth:
|
if len(first_name) > max_first_name_lenghth:
|
||||||
max_first_name_lenghth = len(first_name)
|
max_first_name_lenghth = len(first_name)
|
||||||
print("max_last_name_lenghth :" + str(max_last_name_lenghth))
|
# print("max_last_name_lenghth :" + str(max_last_name_lenghth))
|
||||||
print("max_first_name_lenghth :" + str(max_first_name_lenghth))
|
# print("max_first_name_lenghth :" + str(max_first_name_lenghth))
|
||||||
|
|
||||||
|
|
||||||
def has_numbers(inputString):
|
def has_numbers(inputString):
|
||||||
@@ -130,18 +130,18 @@ def generate_name_from_email(mail_address, pinyin_name_list):
|
|||||||
# key_words = HanLP.extractKeyword(mail_address, 2)
|
# key_words = HanLP.extractKeyword(mail_address, 2)
|
||||||
# print(key_words)
|
# print(key_words)
|
||||||
# setence = "".join(key_words)
|
# setence = "".join(key_words)
|
||||||
print("generate for " + mail_address)
|
# print("generate for " + mail_address)
|
||||||
all_combins = get_ordered_combins(mail_address)
|
all_combins = get_ordered_combins(mail_address)
|
||||||
all_combins.sort(key=len, reverse=True)
|
all_combins.sort(key=len, reverse=True)
|
||||||
no_duplicated_list = []
|
no_duplicated_list = []
|
||||||
for word in all_combins:
|
for word in all_combins:
|
||||||
if word not in no_duplicated_list:
|
if word not in no_duplicated_list:
|
||||||
no_duplicated_list.append(word)
|
no_duplicated_list.append(word)
|
||||||
print(all_combins)
|
# print(all_combins)
|
||||||
|
|
||||||
for i in no_duplicated_list:
|
for i in no_duplicated_list:
|
||||||
word_to_test = "".join(i)
|
word_to_test = "".join(i)
|
||||||
print("word to test is " + word_to_test)
|
# print("word to test is " + word_to_test)
|
||||||
# if len(word_to_test) >= 11:
|
# if len(word_to_test) >= 11:
|
||||||
# found_name = check_name(word_to_test=word_to_test, pinyin_name_list=pinyin_name_list)
|
# found_name = check_name(word_to_test=word_to_test, pinyin_name_list=pinyin_name_list)
|
||||||
# if found_name is not None:
|
# if found_name is not None:
|
||||||
@@ -250,17 +250,17 @@ def filter_already_validated_contacts(_list_to_extract):
|
|||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
start_position = 14500
|
start_position = 1
|
||||||
end_position = 15084
|
end_position = 502
|
||||||
# x = range(start_position, end_position, 100)
|
# x = range(start_position, end_position, 100)
|
||||||
step = 100
|
step = 500
|
||||||
step_list = range(start_position, end_position, step)
|
step_list = range(start_position, end_position, step)
|
||||||
print(step_list[-1])
|
print(step_list[-1])
|
||||||
for x in step_list:
|
for x in step_list:
|
||||||
print(x)
|
print(x)
|
||||||
_from_position = x
|
_from_position = x
|
||||||
_end_position = x + step
|
_end_position = x + step
|
||||||
print("_end_position is {}".format(_end_position))
|
print("_from_position is {},_end_position is {}".format(_from_position, _end_position))
|
||||||
if _end_position <= step_list[-1]:
|
if _end_position <= step_list[-1]:
|
||||||
print("start extraction from {} to {}".format(_from_position, _end_position))
|
print("start extraction from {} to {}".format(_from_position, _end_position))
|
||||||
generate_contact_from_mail_list(_from_position, _end_position)
|
generate_contact_from_mail_list(_from_position, _end_position)
|
||||||
|
|||||||
Reference in New Issue
Block a user