minedujob

minedujob

Python script to fetch random job from Ministry Of Education and convert it into a json object. Merge data in from DigitalNZ

In [167]:
import requests
import untangle
import xmltodict
import json
import random
import bs4
import dominate
from dominate.tags import *
from pydnz import Dnz
import arrow
import bs4
In [168]:
dnz = Dnz('keyhere')
In [169]:
jobreq = requests.get('https://jobs.minedu.govt.nz/jobtools/job_rss?o1=17584&k2=A52B3674BC046465&source=JobRSS&medium=JobRSS')
In [170]:
jobtxta = jobreq.text
In [171]:
obj = untangle.parse(jobtxta)
In [172]:
dicjobz = xmltodict.parse(jobtxta)
In [173]:
ranldicj = len(dicjobz['rss']['channel']['item'])
In [174]:
ranldicj
Out[174]:
27
In [175]:
randicz = random.randint(0, ranldicj)
In [176]:
randicz
Out[176]:
15
In [177]:
dicrs = dicjobz['rss']['channel']['item'][randicz]
In [178]:
dicrts = dicrs['title']
dicrtq = dicrs
In [179]:
artim = arrow.now(dicrtq['pubDate'])
In [180]:
print artim.date()
2015-07-12
In [181]:
print artim.time()
09:39:49.250847
In [182]:
artim.weekday()
Out[182]:
6
In [183]:
msjobdic = dict()
In [184]:
msjobdic.update({'date advertised' : str(artim.date()), 
                'time advertised' : str(artim.time()),
                'title' : dicrts})
In [185]:
msjobdic
Out[185]:
{'date advertised': '2015-07-12',
 'time advertised': '09:39:49.250847',
 'title': u'Executive Support Officer'}
In [186]:
requlink = dicrtq['link']
In [187]:
reqlinkq = requests.get(requlink)
In [188]:
bsoup = bs4.BeautifulSoup(reqlinkq.text)
In [189]:
bfina = bsoup.findAll('a')
In [190]:
#msjobdic.update({'date advertised' : str(artim.date()), 
#                'time advertised' : str(artim.time()),
#                'title' : dicrts,
#                '})
for bfin in bfina:
    if ('@') in bfin.text:
        #print bfin.text
        msjobdic.update({('email') : str(bfin.text)})
        
In [191]:
for bfiny in bfina:
    if '.docx' in bfiny.text:
        print bfiny.text
In [192]:
#Search for this file and render text.
#if jpg/gif render.
In [193]:
for bfin in bfina:
    if ('href') in bfin.text:
        #print bfin.text
        msjobdic.update({('href') : str(bfin.text)})
In [194]:
#msjob
In [195]:
msjobdic.update({'randnum' : randicz})
In [196]:
for bfiny in bfina:
    if '.docx' in bfiny.text:
        msjobdic.update({'doc' : bfiny.text})
In [197]:
msjobdic
Out[197]:
{'date advertised': '2015-07-12',
 'randnum': 15,
 'time advertised': '09:39:49.250847',
 'title': u'Executive Support Officer'}
In [198]:
msjobdic.update({'link' : dicrtq['link']})
In [199]:
msjobdic
Out[199]:
{'date advertised': '2015-07-12',
 'link': u'http://jobs.minedu.govt.nz/jobtools/jncustomsearch.viewFullSingle?in_organid=17584&in_jnCounter=222576690&in_jobDate=All&in_searchbox=YES&in_summary=S#utm_source=JobRSS&utm_medium=JobRSS',
 'randnum': 15,
 'time advertised': '09:39:49.250847',
 'title': u'Executive Support Officer'}
In [199]:
 
In [200]:
#msjobdic.update({'doc' : b
In [201]:
dicrtq['link']
Out[201]:
u'http://jobs.minedu.govt.nz/jobtools/jncustomsearch.viewFullSingle?in_organid=17584&in_jnCounter=222576690&in_jobDate=All&in_searchbox=YES&in_summary=S#utm_source=JobRSS&utm_medium=JobRSS'
In [202]:
dicrts
Out[202]:
u'Executive Support Officer'
In [203]:
debsnz =  dnz.search(dicrs)
In [204]:
randrecord = len(debsnz.records)
In [205]:
ranitdz = random.randint(0, randrecord)
In [206]:
ranitdz
Out[206]:
0
In [207]:
randicz
Out[207]:
15
In [208]:
#debsnz.records
In [209]:
#debrecintz = debsnz.records[ranitdz]
In [210]:
#kederz = debrecintz.keys()
In [211]:
#print debrecintz['category']
#print debrecintz['usage']
In [212]:
#for ked in kederz:
#    print ked
#    print debrecintz[ked]
    #print ked
    #print ked
    #print debrecintz['category']
In [213]:
#print debrecintz['id']
In [214]:
#getiddnz = ('http://api.digitalnz.org/v3/records/' + str(debrecintz['id']) + '.json?api_key=Ph2LDuyiJmJcQm1S5myy')
In [215]:
#getiddnz
In [216]:
#reqidnz = requests.get(getiddnz)
In [217]:
#json.dumps(reqidnz)
In [218]:
mylirq = list()
In [219]:
#for reqi in reqidnz:
    #print reqi
    #print reqi.upper()
    #reqi
In [220]:
#my_dict.pop("key", None)
In [221]:
dicrq = len(dicjobz['rss']['channel']['item'])
In [222]:
dicrq
Out[222]:
27
In [223]:
#Return a random job.
In [224]:
ranjoz = random.randint(0, dicrq)

dicrsch = dicjobz['rss']['channel']['item']
In [225]:
print dicrsch[ranjoz]['link']
http://jobs.minedu.govt.nz/jobtools/jncustomsearch.viewFullSingle?in_organid=17584&in_jnCounter=222578149&in_jobDate=All&in_searchbox=YES&in_summary=S#utm_source=JobRSS&utm_medium=JobRSS
In [226]:
print dicrsch[ranjoz]['title']
Practice Advisors
In [227]:
#jobtype
#location
#date advertised
#jobreference
#jobtitle
#should be keys
#Currently they are inside description key
#Create new json file that fixes this.
In [228]:
#for dezsr in  dicrsch[ranjoz]['description']:
##    if 'JobType' in dezs#r:
# #       print dezsr
In [229]:
#docstart.title = ('ministry-of-education-jobs')
doc = dominate.document(title='ministry-of-education-jobs')

with doc.head:
    link(rel='stylesheet', href='style.css')
    script(type='text/javascript', src='script.js')

with doc:
    #with div(id='header').add(ol()):
        #for i in ['home', 'about', 'contact']:
            #li(a(i.title(), href='/%s.html' % i))

    with div(cls='row'):
        h1('education-counts-jobs')
        h2(dicrsch[ranjoz]['title'])
        p(dicrs)
        #p(dicrsch[ranjoz]['description'])
        p(a(dicrs, href= dicrsch[ranjoz]['link']))
        
        #for ked in kederz:
        #print ked
        #    p((kederz[ked]))
        #print ked
        #print ked

        
In [230]:
#print doc

docre = doc.render()
#s = docre.decode('ascii', 'ignore')
yourstring = docre.encode('ascii', 'ignore').decode('ascii')
indfil = ('/home/wcmckee/minedujob/index.html')
mkind = open(indfil, 'w')
mkind.write(yourstring)
mkind.close()
In [231]:
opeind = open('/home/wcmckee/minedujob/index.html', 'a')
In [232]:
opeind.write(dicrsch[ranjoz]['description'])
In [233]:
opeind.close()
In [233]:
 
In [ ]:
 

Comments

Comments powered by Disqus