Search in book...
Toggle Font Controls
Create new playlist

Name your new playlist

Playlist description (optional)
Sign In

Email address

Password

Forgot Password?

or

Continue with Facebook

Continue with Google
Sign Up

Full Name

Email address

Confirm Email Address

Password

or

Continue with Facebook

Continue with Google

APPENDIX B
Code for screen.py

This appendix displays the code blocks as described and discussed in Chapter 4, “Computational Thinking in Practice.” Each code block is labeled alphabetically for more convenient reference.

A

# screen.py
 
# Like in the app.py code, software modules are loaded in to help set up the
# software environment.
# No need to duplicate the code here!
 
# creates the ResultElement object, containing rank value and filename
class ResultElement:
    def __init__(self, rank, filename):
        self.rank = rank
        self.filename = filename
 
# function: access and return the filepath reformatted using forward slashes
def getfilepath(loc):
    temp = str(loc)
    temp = temp.replace('', '/')
    return temp
 
# function: recursively retrieves resume documents that are formatted as 
# pdf, doc and docx. Each page of resume is captured separately and text
# extracted using extractText() for pdfs or textract.process() for MS Word
def res(jobfile):
    # create empty vectors to store files of different formats, text
    # from resumes, etc
    Resume_Vector = []
    Ordered_list_Resume = []
    Ordered_list_Resume_Score = []
    LIST_OF_FILES = []
    LIST_OF_FILES_PDF = []
    LIST_OF_FILES_DOC = []
    LIST_OF_FILES_DOCX = []
    Resumes = [] 
    Temp_pdf = []
 
    # iterate through the resumes directory, separately listing them
    # into vectors
    os.chdir('./Original_Resumes')
    for file in glob.glob('**/*.pdf', recursive=True):
        LIST_OF_FILES_PDF.append(file)
    for file in glob.glob('**/*.doc', recursive=True):
        LIST_OF_FILES_DOC.append(file)
    for file in glob.glob('**/*.docx', recursive=True):
        LIST_OF_FILES_DOCX.append(file)
 
    # ordering files by format type gives the coder a systematic way to 
    # process files. It's slightly more efficient to do so as the method
    # is loaded
    # into the computer's memory/cache.  
    LIST_OF_FILES = LIST_OF_FILES_DOC + LIST_OF_FILES_DOCX + LIST_OF_
    # FILES_PDF
  
    print("This is LIST OF FILES")
    print(LIST_OF_FILES)
 
    # print("Total Files to Parse	" , len(LIST_OF_PDF_FILES))
    # iterate through filenames and parse files, executing format-
    # specific methods
    print("####### PARSING ########")
    for nooo,i in enumerate(LIST_OF_FILES):
        Ordered_list_Resume.append(i)
        Temp = i.split(".")
        if Temp[1] == "pdf" or Temp[1] == "Pdf" or Temp[1] == "PDF":
            try:
                print("This is PDF" , nooo)
                with open(i,'rb') as pdf_file:
                    read_pdf = PyPDF2.PdfFileReader(pdf_file)
                
 
                    number_of_pages = read_pdf.getNumPages()
                    for page_number in range(number_of_pages): 
                        page = read_pdf.getPage(page_number)
                        # main external method, we'll look under the hood                         # later
                        page_content = page.extractText()
                        page_content = page_content.replace('
', ' ')
                        # page_content.replace("
", "")
                        Temp_pdf = str(Temp_pdf) + str(page_content)
                        # Temp_pdf.append(page_content)
                        # print(Temp_pdf)
                    Resumes.extend([Temp_pdf])
                    Temp_pdf = '"
            except Exception as e: print(e)
        if Temp[1] == "doc" or Temp[1] == "Doc" or Temp[1] == "DOC":
            print("This is DOC" , i)
                
            try:
               # main external method, we'll look under the hood later
                a = textract.process(i)
                a = a.replace(b'
',  b' ')
                a = a.replace(b'
',  b' ')
                b = str(a)
                c = [b]
                Resumes.extend(c)
            except Exception as e: print(e)
                
        if Temp[1] == "docx" or Temp[1] == "Docx" or Temp[1] == "DOCX":
            print("This is DOCX" , i)
            try:
               # main external method, it's called a few lines earlier
               # in this code
                a = textract.process(i)
                a = a.replace(b'
',  b' ')
                a = a.replace(b'
',  b' ')
                b = str(a)
                c = [b]
                Resumes.extend(c)
            except Exception as e: print(e)
                    
                
        if Temp[1] == "ex" or Temp[1] == "Exe" or Temp[1] == "EXE":
            print("This is EXE" , i)
            pass
 
    print("Done Parsing.")

B

# screen.py
 
    Job_Desc = 0
    LIST_OF_TXT_FILES = []
    os.chdir('../Job_Description')
    f = open(jobfile , 'r')
    text = f.read()
        
    try:
        tttt = str(text)
        # automate a summary of the text tttt by selecting a few
        # sentences/phrases
        # to represent the entire text. In this case, the text is a job
        # description. 
        tttt = summarize(tttt, word_count=100)
        text = [tttt]
    except:
        text = 'None'
 
    f.close()
    # create a word storage container for all non-stop words
    vectorizer = TfidfVectorizer(stop_words='english')
 
    # sort words from 'text' into this vectorizer container and process
    # idf values
    vectorizer.fit(text)
    # uses this vocabulary to construct tf-idf-weighted document-term
    # matrix
    # each word is valued by its frequency (tf) adjusted for relevance
    # (idf) in docs 
    vector = vectorizer.transform(text)
 
    Job_Desc = vector.toarray()
    # print("

")
    # print("This is job desc : " , Job_Desc)
 
    os.chdir('../')
    for i in Resumes:
        text = i
        tttt = str(text)
        try:
            tttt = summarize(tttt, word_count=100) 
            text = [tttt]
            vector = vectorizer.transform(text)
 
            aaa = vector.toarray()
            Resume_Vector.append(vector.toarray())
        except:
            pass
    # print(Resume_Vector)

C

# screen.py
    for i in Resume_Vector:
              samples = i
              # create a container that'll record the closest resume to
              # another resume
       neigh = NearestNeighbors(n_neighbors=1)
       # group resumes based on a resume's frequent words
             neigh.fit(samples) 
             NearestNeighbors(algorithm='auto', leaf_size=30)
Ordered_list_Resume_Score.extend(neigh.kneighbors(Job_Desc)[0][0].tolist())
 
    Z = [x for _,x in sorted(zip(Ordered_list_Resume_Score,Ordered_list_
    # Resume))]
    print(Ordered_list_Resume)
    print(Ordered_list_Resume_Score)
    flask_return = []
    # for n,i in enumerate(Z):
    #     print("Rankkkkk	" , n+1, ":	" , i)
 
    for n,i in enumerate(Z):
        # print("Rank	" , n+1, ":	" , i)
        # flask_return.append(str("Rank	" , n+1, ":	" , i))
        name = getfilepath(i)
        #name = name.split('.')[0]
        rank = n+1
        res = ResultElement(rank, name)
        flask_return.append(res)
        # res.printresult()
        print(f"Rank{res.rank+1} :	 {res.filename}")
    return flask_return
 
if __name__ == '__main__':
    inputStr = input("")
    sear(inputStr)

..................Content has been hidden....................

You can't read the all page of ebook, please click here login for view all page.

Table of Contents for APPENDIX B: Code for screen.py

Create new playlist

Sign In

Sign Up

A

B

C

Table of Contents for
APPENDIX B: Code for screen.py