pin nikola
Pin Nikola
pinterest rss feed board parser. parse and retrieve url saving and creating data as json object. builds a Nikola static site with the data.
In [1]:
import requests
#from flask import Flask, jsonify, abort
import pickle
#import pinata
#from flask_restful import Resource, Api
import getpass
import xmltodict
import json
import arrow
import os
import bs4
import PIL
import shutil
#from urlparse import urlparse
#from PIL import Image
In [2]:
timnow = arrow.now()
In [3]:
myusr = getpass.getuser()
In [4]:
#facereq = requests.get('https://api.zalando.com/facets')
In [5]:
#facejs = facereq.json()
In [6]:
#flen = len(facejs)
In [7]:
#for fle in range(0, flen):
# print(facejs[fle]['filter'])
#
# face = (facejs[fle]['facets'])
#
# facelen = len(face)
#
# for fac in range(0, facelen):
## print(face[fac]['displayName'])
#
# print(face[fac]['key'])
In [8]:
reqhm = requests.get('https://pinterest.com/artctrl/feed.rss/')
In [9]:
xmlpin = xmltodict.parse(reqhm.text)
In [10]:
itlen = len(xmlpin['rss']['channel']['item'])
In [11]:
itlen
Out[11]:
In [12]:
with open('/home/{}/womenfashion.pickle'.format(myusr), 'rb') as handle:
womendic = pickle.load(handle)
print (womendic)
In [13]:
len(womendic)
In [14]:
dapatz = ('/home/{}/pin/'.format(myusr))
In [15]:
mylis = list()
In [16]:
startdic = dict()
In [17]:
for itl in range(0, itlen):
print(json.dumps(xmlpin['rss']['channel']['item'][itl]['title']))
pinlink = (json.dumps(xmlpin['rss']['channel']['item'][itl]['link']))
pids =(pinlink.replace('https://www.pinterest.com/pin/', ''))
pinid = pids.replace('/', '')
pinnor = (pinid.replace('"', ''))
print(pinnor)
pindesc = (json.dumps(xmlpin['rss']['channel']['item'][itl]['description']))
pinbs = bs4.BeautifulSoup(pindesc)
pinfin = (pinbs.find('img')['src'])
pinrep = (pinfin.replace('\\', ''))
prep = pinrep.replace('"', '')
print(prep)
rstme = ('.. title: ' + pinnor + ' \n' + '.. slug: ' + pinnor + ' \n' + '.. date: ' + str(timnow))
#print(pinbs.text)
#print(pinbs)
finpinurl = (pinlink.replace('"', ''))
print(finpinurl)
reqfinpin = requests.get(finpinurl)
reqfintxt = reqfinpin.text
reqsoup = bs4.BeautifulSoup(reqfintxt)
newli = list()
reqa = reqsoup.find_all('meta')
#print(reqa)
urlmet = (reqa[30])
print(urlmet['content'])
womendic.update({pinnor : dict({'id' : pinnor, 'image' : prep, 'url' : urlmet['content'], 'pinurl' : finpinurl})})
startdic.update({pinnor : dict({'id' : pinnor, 'image' : prep, 'url' : urlmet['content'], 'pinurl' : finpinurl})})
#for req in reqa:
# print(req)
#if 'http' in req:
# print(req)
#for link in reqsoup.find_all('a'):
# try:
# if 'http' in (link.get('href')):
# print(link.get('href'))
# with open('/home/{}/women-fashion/posts/{}.md'.format(myusr, pinnor), 'a') as fashmd:
# fashmd.write('[' + link.get('href') + '](' + itl + ')\n\n')
# except TypeError:
# pass
#print(link.get('href'))
#print(reqa)
mylis = list()
newli.append(reqsoup)
mylis.append(reqsoup.find_all('meta'))
with open('/home/{}/women-fashion/posts/{}.meta'.format(myusr, pinnor), 'w') as fashmd:
fashmd.write(rstme)
with open('/home/{}/women-fashion/posts/{}.md'.format(myusr, pinnor), 'w') as fashmd:
fashmd.write('!' + '[' + pinnor + '](/galleries/' + pinnor + '.jpg)\n\n' + pinbs.text + '\n\n[link](' + urlmet['content'] + ')\n\n')
response = requests.get(prep, stream=True)
with open('{}{}.jpg'.format(dapatz + 'galleries/', pinnor), 'wb') as out_file:
shutil.copyfileobj(response.raw, out_file)
del response
#print(json.dumps(xmlpin['rss']['channel']['item'][itl]['link']))
In [40]:
with open('/home/{}/womenfashion.pickle'.format(myusr), 'wb') as handle:
pickle.dump(womendic, handle, protocol=pickle.HIGHEST_PROTOCOL)
In [41]:
#len(womendic)
In [42]:
#jsdump = json.dumps(startdic)
In [43]:
#jsdump
In [319]:
#lispin = os.listdir('{}'.format(dapatz))
In [320]:
#heighsiz = list()
#widthsiz = list()
In [ ]:
In [518]:
#for lisp in lispin:
# im = Image.open(dapatz + lisp)
# print(im.size[1])
# heighsiz.append(im.size[1])
# widthsiz.append(im.size[0])
# print(im.filename)
# print(im.mode)
In [519]:
#widthsiz
In [520]:
#max(widthsiz)
In [521]:
#im = Image.new("RGB", (max(widthsiz), sum(heighsiz)), "white")
In [ ]:
In [325]:
#im.size
Out[325]:
In [326]:
#sum(heighsiz)
Out[326]:
In [327]:
#posize = 0
In [328]:
#testlis = list()
In [ ]:
In [329]:
#for lisp in lispin:
# imz = Image.open(dapatz + lisp)
#print(im.size[1])
#heighsiz.append(im.size[1])
#print(posize)
# pozi = (posize + (im.size[1]))
# testlis.append(im.size[1])
# print(sum(testlis))
# im.paste(imz, (0, sum(testlis)))
#img('home/wcm/pintrest/' + lisp)
#print(pozi)
#print(im.filename)
#print(im.mode)
In [330]:
#im.save('/home/wcm/Downloads/pin.jpg')
In [76]:
#json.dumps(xmlpin['rss']['channel']['item'][0])
Out[76]:
In [525]:
#https://nz.pinterest.com/search/pins/?q=kumera%20recipes
In [536]:
#searchspace = requests.get('https://nz.pinterest.com/search/pins/?q=kumera%20recipes')
In [537]:
#searchspace = requests.get('https://nz.pinterest.com/search/pins/?q=space%20fashion&rs=typed&term_meta[]=space%7Ctyped&term_meta[]=fashion%7Ctyped')
In [538]:
#bsearch = bs4.BeautifulSoup(searchspace.text)
In [44]:
#for bsa in bsearch.find_all('a'):
#if '/pin/' in bsa:
# for pi in bsa:
# print(pi)
#if '/pin/' in pi:
# print(pi)
In [45]:
#for mybst in bsearch.find_all('img'):
# if '236x' in (mybst['src']):
# print(mybst)
# print(mybst['alt'])
# print((mybst['src']))
In [ ]:
Comments
Comments powered by Disqus