zalandobrands
zalando brands
python script to interact with zalando brands and articles.
Improve the brands api with more data from the Zalando brand page and other sources - giphy, duckduckgo,
analyze the description text for each brand. Create images stories with keywords.
fix the text that is failed to be parsed.
create flask json api with new and updated brand. title, slug, tags, logo, images.
In [3]:
import getpass
import requests
import shutil
import arrow
import bs4
import nltk
from nltk.tag import pos_tag
import random
In [6]:
curtim = arrow.now()
In [7]:
myusr = getpass.getuser()
In [8]:
zalbr = ('/home/{}/zalando/'.format(myusr))
In [9]:
zalgal = zalbr + 'galleries/'
In [10]:
branreq = requests.get('https://api.zalando.com/brands')
In [11]:
branjs = branreq.json()
In [12]:
totele = branjs['totalElements']
In [13]:
brnconl = len(branjs['content'])
In [14]:
branjs
Out[14]:
In [15]:
brnconl
Out[15]:
In [16]:
totpag = branjs['totalPages']
In [17]:
zalgal
Out[17]:
In [18]:
lilwhi = requests.get('https://www.zalando.co.uk/little-white-lies/')
lilw = lilwhi.text
bsliw = bs4.BeautifulSoup(lilw)
bsliw
desfil = (bsliw.find('div', id=('description')))
desp = desfil.p
desp.text
In [19]:
#deshe = desfil.h2
In [20]:
desp = desfil.p
In [21]:
toktx = nltk.word_tokenize(desp.text)
In [22]:
nlmytx = nltk.Text(toktx)
In [23]:
nlmytx.concordance('Your')
In [24]:
toktx
In [25]:
for blogb in blogbrdic:
print(blogb)
In [26]:
len(blogbrdic)
In [27]:
for ble in range(0, len(blogbrdic)):
print(blogbrdic[ble])
In [28]:
desp.text
In [29]:
blogbrdic = dict()
In [30]:
bloglisn = list()
In [31]:
reqph = requests.get('https://api.zalando.com/brands?pageSize={}'.format(totpag))
alljs = (reqph.json())
brnconz = len(alljs['content'])
for sel in range(0, brnconz):
#print(alljs['content'][sel]['name'])
namsl = (alljs['content'][sel]['name'])
namcd = (alljs['content'][sel]['key'])
print(namcd)
namsq = namsl.replace(' ', '-')
mamsq = namsq.lower
print(mamsq())
blogbrdic.update({mamsq() : 'test'})
try:
logurl = (alljs['content'][sel]['logoUrl'])
shurl = (alljs['content'][sel]['shopUrl'])
print(logurl)
print(shurl)
lilwhi = requests.get(shurl)
lilw = lilwhi.text
bsliw = bs4.BeautifulSoup(lilw)
#bsliw
desfil = (bsliw.find('div', id=('description')))
try:
desp = desfil.p
depa = desfil.strong
except AttributeError:
pass
print(depa.text)
print(desp.text)
blogbrdic.update({mamsq() : dict({'description' : desp.text, 'header': depa.text})})
bloglisn.append(desp.text)
#response = requests.get(logurl, stream=True)
#print(response.headers)
#print(response.history)
#splilog = urlsplit(logurl)
#print(splilog[2])
with open ('/home/{}/zalando/posts/{}.md'.format(myusr, namcd), 'w') as oppost:
#oppost.write('\n\n' + mamsq() + '\n\n' + depa.text + '\n\n' + desp.text))
with open ('/home/{}/zalando/posts/{}.meta'.format(myusr, namcd), 'w') as opmetat:
#opmetat.write("{}".format(str(curtim))
opmetat.write('.. title: {}\n.. slug: {}\n.. date: {}\n.. tags: \n.. link:\n.. description:\n.. type: text'.format(namcd, namcd, curtim))
#with open('{}{}.png'.format(zalgal, str(totp)), 'wb') as out_file:
# shutil.copyfileobj(response.raw, out_file)
# del response
except KeyError:
continue
In [275]:
g = Generator(bloglisn)
In [282]:
myblog = ' '.join(bloglisn)
In [287]:
nlblog = (myblog.split)
In [371]:
fixcom = myblog.replace(',', '')
In [375]:
foxcom = fixcom.replace('.', '')
In [378]:
foxcom
Out[378]:
In [354]:
bltxe = nltk.Text(myblog)
In [382]:
post = pos_tag(foxcom.split())
In [383]:
post
Out[383]:
In [319]:
jjword = [word for word,pos in post if pos == 'JJ']
In [326]:
sejj = set(jjword)
In [336]:
for sej in sejj:
if ',' in sej:
#print(', is here')
sej.replace(',', '')
elif '.' in sej:
#print('. is here')
sej.replace(',', '')
In [298]:
propernouns = [word for word,pos in post if pos == 'NN']
In [351]:
for sej in propernouns:
if ',' in sej:
#print(', is here')
sej.replace(',', '')
elif '.' in sej:
print('. is here')
#sej.replace('.', '')
#print(sej)
s = sej.strip('.')
print(s)
In [341]:
ppset = set(propernouns)
In [302]:
doword = [word for word,pos in post if pos == 'VB']
In [316]:
for prpno in propernouns:
prprep = (prpno.replace(',', ''))
prprez = (prprep.replace('.', ''))
In [310]:
random.sample(propernouns, 5)
Out[310]:
In [311]:
random.sample(doword, 5)
Out[311]:
In [455]:
'''
for totp in range(1, totpag):
reqph = requests.get('https://api.zalando.com/brands?page={}'.format(totp))
alljs = (reqph.json())
brnconz = len(alljs['content'])
for sel in range(0, brnconz):
#print(alljs['content'][sel]['name'])
namsl = (alljs['content'][sel]['name'])
namcd = (alljs['content'][sel]['key'])
namsq = namsl.replace(' ', '-')
mamsq = namsq.lower
print(namcd)
print(mamsq())
try:
logurl = (alljs['content'][sel]['logoUrl'])
print(logurl)
response = requests.get(logurl, stream=True)
print(response.headers)
print(response.history)
#print(response.)
with open ('/home/{}/zalando/posts/'.format(myusr) + fulyr + '.md', 'w') as oppost:
for toda in imgtoday:
oppost.write(('!' + '[' + toda.strip('.jpg') + '](' + galpath + toda + ')\n\n' + '[' + toda.strip('-reference.jpg') + ']' + '(https://reddit.com/u/' + toda.strip('-reference.jpg') + ')' + '\n\n'))
with open('{}{}.png'.format(zalgal, str(totp)), 'wb') as out_file:
shutil.copyfileobj(response.raw, out_file)
del response
except KeyError:
continue
'''
Out[455]:
In [ ]:
bloglisn
In [ ]:
zalaart = requests.get('https://api.zalando.com/articles')
In [ ]:
print('hello')
In [56]:
zaljs = zalaart.json()
In [118]:
len(zaljs['content'])
Out[118]:
In [117]:
zaljs['content'][0]['categoryKeys']
Out[117]:
In [59]:
zalpage = zaljs['totalPages']
In [69]:
zalpage
Out[69]:
In [420]:
reqartz = requests.get('https://api.zalando.com/articles?pageSize=200')
In [417]:
artjsz = reqartz.json()
In [422]:
arjsct = artjsz['content']
In [424]:
arle = len(arjsct)
In [452]:
for arj in arjsct:
artnam = (arj['name'])
artimg = (arj['media']['images'][0]['largeUrl'])
artpric = (arj['units'][0]['price']['value'])
nacd = (arj['brand']['key'])
branam = (arj['brand']['name'])
namsq = branam.replace(' ', '-')
mamsq = namsq.lower()
print(mamsq)
with open ('/home/{}/zalando/posts/{}.md'.format(myusr, nacd), 'a') as oppost:
#oppost.write(''.format(artnam, str(artpric), nacd, artimg))
#oppost.write(('\n\n' + mamsq() + '\n\n' + depa.text + '\n\n' + desp.text))
with open ('/home/{}/zalando/posts/{}.meta'.format(myusr, nacd), 'w') as opmetat:
#opmetat.write("{}".format(str(curtim))
opmetat.write('.. title: {}\n.. slug: {}\n.. date: {}\n.. tags: \n.. link:\n.. description:\n.. type: text'.format(nacd, nacd, curtim))
In [453]:
'''
zaart = requests.get('https://api.zalando.com/articles?pageSize=200')
zajs = zaart.json()
print(zal)
artnam = (zajs['content'][zal]['name'])
artimg = (zajs['content'][zal]['media']['images'][0]['largeUrl'])
artpric = (zajs['content'][zal]['units'][0]['price']['value'])
print(zajs['content'][zal]['brand'])
nacd = (zajs['content'][zal]['brand']['key'])
print(zajs['content'][zal]['brand']['name'])
with open ('/home/{}/zalando/posts/{}.md'.format(myusr, nacd), 'a') as oppost:
#oppost.write(''.format(artnam, str(artpric), nacd, artimg))
#oppost.write(('\n\n' + mamsq() + '\n\n' + depa.text + '\n\n' + desp.text))
with open ('/home/{}/zalando/posts/{}.meta'.format(myusr, nacd), 'w') as opmetat:
#opmetat.write("{}".format(str(curtim))
opmetat.write('.. title: {}\n.. slug: {}\n.. date: {}\n.. tags: \n.. link:\n.. description:\n.. type: text'.format(namcd, namcd, curtim))
'''
Out[453]:
In [ ]:
In [454]:
'''
for zal in range(0, zalpage):
zaart = requests.get('https://api.zalando.com/articles')
zajs = zaart.json()
print(zal)
artnam = (zajs['content'][zal]['name'])
artimg = (zajs['content'][zal]['media']['images'][0]['largeUrl'])
artpric = (zajs['content'][zal]['units'][0]['price']['value'])
print(zajs['content'][zal]['brand'])
nacd = (zajs['content'][zal]['brand']['key'])
print(zajs['content'][zal]['brand']['name'])
with open ('/home/{}/zalando/posts/{}.md'.format(myusr, nacd), 'a') as oppost:
#oppost.write(''.format(artnam, str(artpric), nacd, artimg))
#oppost.write(('\n\n' + mamsq() + '\n\n' + depa.text + '\n\n' + desp.text))
#try:
# stufav = (zajs['content'][zal]['available'])
# if stufav == True:
# print('yeah in stock')
# elif stufav == False:
# print('not in stock')
#except IndexError:
# continue
#try:
# print(zajs['content'][zal]['available'])
#except KeyError:
# continue
'''
Out[454]:
In [ ]:
Comments
Comments powered by Disqus