pin nikola

Pin Nikola

pinterest rss feed board parser. parse and retrieve url saving and creating data as json object. builds a Nikola static site with the data.

In [1]:
import requests
#from flask import Flask, jsonify, abort
import pickle
#import pinata
#from flask_restful import Resource, Api
import getpass
import xmltodict
import json
import arrow
import os
import bs4
import PIL
import shutil
#from urlparse import urlparse
#from PIL import Image
In [2]:
timnow = arrow.now()
In [3]:
myusr = getpass.getuser()
In [4]:
#facereq = requests.get('https://api.zalando.com/facets')
In [5]:
#facejs = facereq.json()
In [6]:
#flen = len(facejs)
In [7]:
#for fle in range(0, flen):
#    print(facejs[fle]['filter'])
#    
#    face = (facejs[fle]['facets'])
#    
#    facelen = len(face)
#    
#    for fac in range(0, facelen):
##        print(face[fac]['displayName'])
 #       
#        print(face[fac]['key'])
In [8]:
reqhm = requests.get('https://pinterest.com/artctrl/feed.rss/')
In [9]:
xmlpin = xmltodict.parse(reqhm.text)
In [10]:
itlen = len(xmlpin['rss']['channel']['item'])
In [11]:
itlen
Out[11]:
25
In [12]:
with open('/home/{}/womenfashion.pickle'.format(myusr), 'rb') as handle:
    womendic = pickle.load(handle)

print (womendic)
---------------------------------------------------------------------------
FileNotFoundError                         Traceback (most recent call last)
<ipython-input-12-c427fe5ca178> in <module>()
----> 1 with open('/home/{}/womenfashion.pickle'.format(myusr), 'rb') as handle:
      2     womendic = pickle.load(handle)
      3 
      4 print (womendic)

FileNotFoundError: [Errno 2] No such file or directory: '/home/wcm/womenfashion.pickle'
In [13]:
len(womendic)
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-13-431d9f41ab80> in <module>()
----> 1 len(womendic)

NameError: name 'womendic' is not defined
In [14]:
dapatz = ('/home/{}/pin/'.format(myusr))
In [15]:
mylis = list()
In [16]:
startdic = dict()
In [17]:
for itl in range(0, itlen):
    print(json.dumps(xmlpin['rss']['channel']['item'][itl]['title']))
    pinlink = (json.dumps(xmlpin['rss']['channel']['item'][itl]['link']))
    pids =(pinlink.replace('https://www.pinterest.com/pin/', ''))
    pinid = pids.replace('/', '')
    
    pinnor = (pinid.replace('"', ''))
    print(pinnor)
    
    pindesc = (json.dumps(xmlpin['rss']['channel']['item'][itl]['description']))
    pinbs = bs4.BeautifulSoup(pindesc)
    pinfin = (pinbs.find('img')['src'])
    pinrep = (pinfin.replace('\\', ''))
    prep = pinrep.replace('"', '')
    print(prep)
    
        
        
    rstme = ('.. title: ' + pinnor + ' \n' + '.. slug: ' + pinnor + ' \n' + '.. date: ' + str(timnow))
    
    #print(pinbs.text)
    #print(pinbs)
    finpinurl = (pinlink.replace('"', ''))
    
    print(finpinurl)
    reqfinpin = requests.get(finpinurl)
    reqfintxt = reqfinpin.text
    reqsoup = bs4.BeautifulSoup(reqfintxt)
    newli = list()
    reqa = reqsoup.find_all('meta')
    #print(reqa)
    urlmet = (reqa[30])
    
    print(urlmet['content'])
    
    womendic.update({pinnor : dict({'id' : pinnor, 'image' : prep, 'url' : urlmet['content'], 'pinurl' : finpinurl})})
    startdic.update({pinnor : dict({'id' : pinnor, 'image' : prep, 'url' : urlmet['content'], 'pinurl' : finpinurl})})
    #for req in reqa:
    #    print(req)
        #if 'http' in req:
        #    print(req)
        
    #for link in reqsoup.find_all('a'):
    #    try:
    #        if 'http' in (link.get('href')):
    #            print(link.get('href'))
    #            with open('/home/{}/women-fashion/posts/{}.md'.format(myusr, pinnor), 'a') as fashmd:
    #                fashmd.write('[' + link.get('href') + '](' + itl + ')\n\n')
    #    except TypeError:
    #        pass
        #print(link.get('href'))
    #print(reqa)
    mylis = list()
    newli.append(reqsoup)
    mylis.append(reqsoup.find_all('meta'))
    
    with open('/home/{}/women-fashion/posts/{}.meta'.format(myusr, pinnor), 'w') as fashmd:
        fashmd.write(rstme)
        
    
    
    with open('/home/{}/women-fashion/posts/{}.md'.format(myusr, pinnor), 'w') as fashmd:
        fashmd.write('!' + '[' + pinnor + '](/galleries/' + pinnor + '.jpg)\n\n' + pinbs.text + '\n\n[link](' + urlmet['content'] + ')\n\n')
        
        
    response = requests.get(prep, stream=True)
    with open('{}{}.jpg'.format(dapatz + 'galleries/', pinnor), 'wb') as out_file:
        shutil.copyfileobj(response.raw, out_file)
        del response
    #print(json.dumps(xmlpin['rss']['channel']['item'][itl]['link']))
    
"Raspberry Pi Superco"
751678994027877238
https://i.pinimg.com/236x/cb/39/4f/cb394f98b320a5910868dce7b1b0b4e3--raspberry-pi-computer-engineers.jpg
https://www.pinterest.com/pin/751678994027877238/
/usr/lib/python3/dist-packages/bs4/__init__.py:181: UserWarning: No parser was explicitly specified, so I'm using the best available HTML parser for this system ("lxml"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.

The code that caused this warning is on line 193 of the file /usr/lib/python3.5/runpy.py. To get rid of this warning, change code that looks like this:

 BeautifulSoup([your markup])

to this:

 BeautifulSoup([your markup], "lxml")

  markup_type=markup_type))
0
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-17-1d320034f431> in <module>()
     34     print(urlmet['content'])
     35 
---> 36     womendic.update({pinnor : dict({'id' : pinnor, 'image' : prep, 'url' : urlmet['content'], 'pinurl' : finpinurl})})
     37     startdic.update({pinnor : dict({'id' : pinnor, 'image' : prep, 'url' : urlmet['content'], 'pinurl' : finpinurl})})
     38     #for req in reqa:

NameError: name 'womendic' is not defined
In [40]:
with open('/home/{}/womenfashion.pickle'.format(myusr), 'wb') as handle:
    pickle.dump(womendic, handle, protocol=pickle.HIGHEST_PROTOCOL)
In [41]:
#len(womendic)
In [42]:
#jsdump = json.dumps(startdic)
In [43]:
#jsdump
In [319]:
#lispin = os.listdir('{}'.format(dapatz))
In [320]:
#heighsiz = list()
#widthsiz = list()
In [ ]:
 
In [518]:
#for lisp in lispin:
#    im = Image.open(dapatz + lisp)
#    print(im.size[1])
#    heighsiz.append(im.size[1])
#    widthsiz.append(im.size[0])
#    print(im.filename)
#    print(im.mode)
In [519]:
#widthsiz
In [520]:
#max(widthsiz)
In [521]:
#im = Image.new("RGB", (max(widthsiz), sum(heighsiz)), "white")
In [ ]:
 
In [325]:
#im.size
Out[325]:
(236, 14066)
In [326]:
#sum(heighsiz)
Out[326]:
14066
In [327]:
#posize = 0
In [328]:
#testlis = list()
In [ ]:
 
In [329]:
#for lisp in lispin:
#    imz = Image.open(dapatz + lisp)
    #print(im.size[1])
    #heighsiz.append(im.size[1])
    #print(posize)
#    pozi = (posize + (im.size[1]))
#    testlis.append(im.size[1])
#    print(sum(testlis))
#    im.paste(imz, (0, sum(testlis)))
    #img('home/wcm/pintrest/' + lisp)
    #print(pozi)
    #print(im.filename)
    #print(im.mode)
14066
28132
42198
56264
70330
84396
98462
112528
126594
140660
154726
168792
182858
196924
210990
225056
239122
253188
267254
281320
295386
309452
323518
337584
351650
365716
379782
393848
407914
421980
436046
450112
In [330]:
#im.save('/home/wcm/Downloads/pin.jpg')
In [76]:
#json.dumps(xmlpin['rss']['channel']['item'][0])
Out[76]:
'{"title": "\\"The Far Side\\" by Ga", "link": "https://www.pinterest.com/pin/751678994025884711/", "description": "<p><a href=\\"/pin/751678994025884711/\\"><img src=\\"https://s-media-cache-ak0.pinimg.com/236x/5d/ce/b1/5dceb19f02f223b25788847e05dea867.jpg\\"></a></p><p>\\"The Far Side\\" by Gary Larson.</p>", "pubDate": "Mon, 23 Jan 2017 14:31:39 +0000", "guid": "https://www.pinterest.com/pin/751678994025884711/"}'
In [525]:
#https://nz.pinterest.com/search/pins/?q=kumera%20recipes
In [536]:
#searchspace = requests.get('https://nz.pinterest.com/search/pins/?q=kumera%20recipes')
In [537]:
#searchspace = requests.get('https://nz.pinterest.com/search/pins/?q=space%20fashion&rs=typed&term_meta[]=space%7Ctyped&term_meta[]=fashion%7Ctyped')
In [538]:
#bsearch =  bs4.BeautifulSoup(searchspace.text)
In [44]:
#for bsa in bsearch.find_all('a'):
    #if '/pin/' in bsa:
#    for pi in bsa:
#        print(pi)
        #if '/pin/' in pi:
        #    print(pi)
    
In [45]:
#for mybst in bsearch.find_all('img'):
#    if '236x' in (mybst['src']):
#        print(mybst)
#        print(mybst['alt'])
#        print((mybst['src']))
In [ ]:
 

Comments

Comments powered by Disqus