from concurrent.futures import ProcessPoolExecutor
import os
import threading
import time
from faker import Faker
import csv
fake = Faker()
csv_save_dir = './save_csv/'
worker_count = 5
per_row = 20000
file_name = "test.txt"
file_num = list(range(worker_count))
if not os.path.exists(csv_save_dir):
os.makedirs(csv_save_dir)
def write_fake_data(file_num):
csv_file_save_path = os.path.join(csv_save_dir, f"{str(file_num)}_{file_name}")
with open(csv_file_save_path, 'w', newline="") as f:
writer = csv.writer(f)
writer.writerow(["name", "ssn", "address", "blood_group", "job", "company", "residence", "sex", "mail", "birthdate"])
for _ in range(per_row):
writer.writerow([fake.profile()["name"], fake.profile()["ssn"], fake.profile()["address"].replace("\n", ""), fake.profile()["blood_group"], fake.profile()["job"], fake.profile()["company"].replace("\n", ""), fake.profile()["residence"].replace("\n", ""), fake.profile()["sex"], fake.profile()["mail"], fake.profile()["birthdate"]])
print(f"Thread={threading.get_ident()}, Process={os.getpid()} CSV File saved...")
def main():
with ProcessPoolExecutor(max_workers=worker_count) as executor:
executor.map(write_fake_data, file_num)
if __name__ == '__main__':
start = time.perf_counter()
main()
finish = time.perf_counter()
print(f'Finished in {round(finish-start, 2)} second(s)')
Thread=140308890367808, Process=25978 CSV File saved... Thread=140308890367808, Process=25979 CSV File saved... Thread=140308890367808, Process=25980 CSV File saved... Thread=140308890367808, Process=25976 CSV File saved... Thread=140308890367808, Process=25977 CSV File saved... Finished in 341.58 second(s) |
5개의 process로 10만건 데이터 생성에 341초
from faker import Faker
import csv
import time
fake = Faker()
start = time.perf_counter()
with open("test.txt", 'w') as f:
writer = csv.writer(f)
writer.writerow(["name", "ssn", "address", "blood_group", "job", "company", "residence", "sex", "mail", "birthdate"])
for _ in range(100000):
writer.writerow([fake.profile()["name"], fake.profile()["ssn"], fake.profile()["address"].replace("\n", ""), fake.profile()["blood_group"], fake.profile()["job"], fake.profile()["company"].replace("\n", ""), fake.profile()["residence"].replace("\n", ""), fake.profile()["sex"], fake.profile()["mail"], fake.profile()["birthdate"]])
finish = time.perf_counter()
print(f'Finished in {round(finish-start, 2)} second(s)')
Finished in 1674.05 second(s) |
1개의 process로 10만건 데이터 생성에 1674초
'python' 카테고리의 다른 글
이것저것 (0) | 2023.01.27 |
---|---|
report 생성 (0) | 2023.01.23 |
python single line to multi line (0) | 2023.01.18 |
aiosmtpd (1) | 2022.12.29 |
python fake data (mimesis) (0) | 2022.09.10 |
댓글