Skip to content Skip to sidebar Skip to footer

For Loop For List Of Objects With Use Multithread In Python

I am new in Python, I had a program which loads one big CSV file where is over 100k lines, each line had 4 columns. In FOR loop I check for each row same duplicated list (dlist), t

Solution 1:

There were a few troubles in the code, you didn't parallel it, you can't just run one-thread code with a heavy task on multiple cores. The code requires some adopts.

Ok, anyway, here we are :)

from math import ceil
from multiprocessing import Pool, cpu_count, freeze_support


defget_red(val):
    return val


classDsRef:
    def__init__(self, pn, comp, comp_name, type, diff):
        self.pn = pn
        self.comp = comp
        self.comp_name = comp_name
        self.type = type
        self.diff = diff

    def__str__(self):
        returnf'{self.pn}{get_red("|")}{self.comp}{get_red("|")}{self.comp_name}{get_red("|")}{self.type}{get_red("|")}{self.diff}\n'def__repr__(self):
        returnstr(self)


classDuplication:
    def__init__(self, pn, comp, cnt):
        self.pn = pn
        self.comp = comp
        self.cnt = cnt

    def__str__(self):
        returnf'{self.pn};{self.comp};{self.cnt}\n'def__repr__(self):
        returnstr(self)

    def__hash__(self):
        returnhash(('pn', self.pn,
                     'comp', self.comp))

    def__eq__(self, other):
        return self.pn == other.pn and self.comp == other.comp


dlist = []
dlist.append(DsRef(
    "TTT_XXX", "CCC_VVV", "CTYPE", "CTYPE", "text"))
dlist.append(DsRef(
    "TTT_XCX", "CCC_VVV", "CTYPE", "CTYPE", "text"))
dlist.append(DsRef(
    "TTT_XXX", "CCC_VCV", "CTYPE", "CTYPE", "text"))
dlist.append(DsRef(
    "TTT_XXX", "CCC_VVV", "CTYPE", "CTYPE", "text"))
dlist.append(DsRef(
    "TTT_XYX", "CCC_YYY", "CTYPE", "CTYPE", "text"))
dlist.append(DsRef(
    "TAT_XQX", "CCC_VVV", "CTYPE", "CTYPE", "text"))
dlist.append(DsRef(
    "ATT_XXX", "CCC_VQV", "CTYPE", "CTYPE", "text"))
dlist.append(DsRef(
    "TTT_EEE", "CCC_VVV", "CTYPE", "CTYPE", "text"))
dlist.append(DsRef(
    "TTT_XWX", "CCC_VVV", "CTYPE", "CTYPE", "text"))
dlist.append(DsRef(
    "TTT_XXX", "CCC_VWV", "CTYPE", "CTYPE", "text"))
dlist.append(DsRef(
    "TTT_EEE", "CCC_VVV", "CTYPE", "CTYPE", "text"))


defFindDuplications(task):
    dlist, start, count = task

    duplicates = []
    for r in dlist[start:start + count]:
        matches = [x for x in dlist if r.pn == x.pn and r.comp == x.comp]
        duplicates.append(Duplication(r.pn, r.comp, len(matches)))

    return {d for d in duplicates if d.cnt > 1}


if __name__ == '__main__':
    freeze_support()

    threads = cpu_count()
    tasks_per_thread = ceil(len(dlist) / threads)

    tasks = [(dlist, tasks_per_thread * i, tasks_per_thread) for i inrange(threads)]

    p = Pool(threads)
    duplicates = p.map(FindDuplications, tasks)
    p.close()
    p.join()

    duplicates = {item for sublist in duplicates for item in sublist}

    print(duplicates)
    print(type(duplicates))

It works well for me and returns the same results as one-thread function and works in all available cores in parallel.

Output

python test.py
{TTT_EEE;CCC_VVV;2
, TTT_XXX;CCC_VVV;2
}
<class'set'>

Post a Comment for "For Loop For List Of Objects With Use Multithread In Python"