- now checking images against google vision api (why google, despite my despise of google, the company): vision api is the only one which is simple enough to use and their labelling is reasonable.

- checking text against google natural language api: the strict classification it offers is better, than free folksonomy, if I ever want to connect entries based on topic
unfortunately they don't support Hungarian yet.
This commit is contained in:
Peter Molnar 2018-12-11 14:06:18 +00:00
parent 033a00db8e
commit 26c6ef77ed
7 changed files with 150 additions and 36 deletions

1
.gitignore vendored
View file

@ -5,4 +5,3 @@ keys.py
lib
gcloud.json
tests/.Exif.tests.jpg.json
Pipfile.lock

View file

@ -10,6 +10,7 @@ import json
import os
import keys
import requests
import logging
from pprint import pprint
@ -58,6 +59,38 @@ class CachedMeta(dict):
for k, v in data.items():
self[k] = v
class GoogleClassifyText(CachedMeta):
def __init__(self, fpath, txt, lang='en'):
self.fpath = fpath
self.txt = txt
self.lang = lang
self._read()
def _call_tool(self):
params = {
"document": {
"type": "PLAIN_TEXT",
"content": self.txt,
"language": self.lang,
}
}
url = "https://language.googleapis.com/v1beta2/documents:classifyText?key=%s" % (
keys.gcloud.get('key')
)
logging.info('calling Google classidyText')
r = requests.post(url, json=params)
try:
resp = r.json()
for cat in resp.get('categories', []):
self[cat.get('name')] = cat.get('confidence')
except Exception as e:
logging.error(
'failed to call Google Vision API on: %s, reason: %s',
self.fpath,
e
)
class GoogleVision(CachedMeta):
def __init__(self, fpath, imgurl):
self.fpath = fpath
@ -65,49 +98,84 @@ class GoogleVision(CachedMeta):
self._read()
@property
def cntr(self):
curr = 0
if os.path.exists('/tmp/visionapicallcounter'):
with open('/tmp/visionapicallcounter', 'rt') as f:
curr = int(f.read())
curr = curr + 1
with open('/tmp/visionapicallcounter', 'wt') as f:
f.write("%d" % curr)
return curr
def response(self):
if 'responses' not in self:
return {}
if not len(self['responses']):
return {}
if 'labelAnnotations' not in self['responses'][0]:
return {}
return self['responses'][0]
@property
def tags(self):
tags = []
if 'labelAnnotations' in self.response:
for label in self.response['labelAnnotations']:
tags.append(label['description'])
if 'webDetection' in self.response:
if 'webEntities' in self.response['webDetection']:
for label in self.response['webDetection']['webEntities']:
tags.append(label['description'])
return tags
@property
def landmark(self):
landmark = None
if 'landmarkAnnotations' in self.response:
if len(self.response['landmarkAnnotations']):
match = self.response['landmarkAnnotations'].pop()
landmark = {
'name': match['description'],
'latitude': match['locations'][0]['latLng']['latitude'],
'longitude': match['locations'][0]['latLng']['longitude']
}
return landmark
@property
def onlinecopies(self):
copies = []
if 'webDetection' in self.response:
if 'pagesWithMatchingImages' in self.response['webDetection']:
for match in self.response['webDetection']['pagesWithMatchingImages']:
copies.append(match['url'])
return copies
def _call_tool(self):
if (self.cntr >= 500 ):
raise ValueError('already at 500 requests!')
params = {
"requests": [
{
"image": {
"source": {
"imageUri": self.imgurl,
}
},
"features": [
{
"type": "LANDMARK_DETECTION",
},
{
"type": "LABEL_DETECTION",
},
]
}
]
"requests": [{
"image": {"source": {"imageUri": self.imgurl}},
"features": [
{
"type": "LANDMARK_DETECTION",
},
{
"type": "WEB_DETECTION",
},
{
"type": "LABEL_DETECTION",
}
]
}]
}
url = "https://vision.googleapis.com/v1/images:annotate?key=%s" % (keys.gcloud.get('key'))
url = "https://vision.googleapis.com/v1/images:annotate?key=%s" % (
keys.gcloud.get('key')
)
logging.info('calling Google Vision API for %s', self.fpath)
r = requests.post(url, json=params)
try:
resp = r.json()
resp = resp['responses'][0]
for k, v in resp.items():
self[k] = v
except Exception as e:
logging.error('failed to call Google Vision API on: %s, reason: %s', self.fpath, e)
logging.error(
'failed to call Google Vision API on: %s, reason: %s',
self.fpath,
e
)
class Exif(CachedMeta):
def __init__(self, fpath):

26
nasg.py
View file

@ -30,7 +30,7 @@ from emoji import UNICODE_EMOJI
from slugify import slugify
import requests
from pandoc import Pandoc
from exiftool import Exif, GoogleVision
from meta import Exif, GoogleVision, GoogleClassifyText
import settings
import keys
@ -506,6 +506,16 @@ class Singular(MarkdownDoc):
pass
return lang
@property
def classification(self):
c = GoogleClassifyText(self.fpath, self.content, self.lang)
k = '/Arts & Entertainment/Visual Art & Design/Photographic & Digital Arts'
if self.is_photo and k not in c.keys():
c.update({
k : '1.0'
})
return c
@property
def url(self):
return "%s/%s/" % (
@ -578,6 +588,7 @@ class Singular(MarkdownDoc):
'review': self.review,
'has_code': self.has_code,
'event': self.event,
'classification': self.classification.keys()
}
if (self.is_photo):
v.update({
@ -707,7 +718,8 @@ class WebImage(object):
'caption': self.caption,
'exif': self.exif,
'is_photo': self.is_photo,
'is_mainimg': self.is_mainimg
'is_mainimg': self.is_mainimg,
'onlinecopies': self.onlinecopies
}
def __str__(self):
@ -717,9 +729,17 @@ class WebImage(object):
return tmpl.render(self.tmplvars)
@cached_property
def vision(self):
def visionapi(self):
return GoogleVision(self.fpath, self.src)
@property
def onlinecopies(self):
copies = {}
for m in self.visionapi.onlinecopies:
if settings.site.get('domain') not in m:
copies[m] = True
return copies.keys()
@cached_property
def meta(self):
return Exif(self.fpath)

View file

@ -60,6 +60,7 @@ author = {
'github': 'https://github.com/petermolnar',
'instagram': 'https://www.instagram.com/petermolnarnet/',
'twitter': 'https://twitter.com/petermolnar',
'micro.blog': 'https://micro.blog/petermolnar',
}
}

View file

@ -53,6 +53,13 @@
{{ exif.lens }}
</dd>
</dl>
{% endif %}
{% if onlinecopies|length > 1 %}
<ul>
{% for copy in onlinecopies %}
<li><a href="{{ copy }}">[{{ loop.index }}]</a></li>
{% endfor %}
</ul>
{% endif %}
</figcaption>
</figure>

View file

@ -218,6 +218,15 @@
</a>
</dd>
<dt>Classification</dt>
<dd>
<ul>
{% for c in post.classification %}
<li>{{ c }}</li>
{% endfor %}
</ul>
</dd>
<dt>License</dt>
<dd class="license">
{% if post.licence == 'CC-BY-4.0' %}

View file

@ -133,12 +133,22 @@ input {
border-bottom: 3px solid #ccc;
}
figcaption > ul,
nav ul {
list-style-type: none;
margin: 0;
padding: 0;
}
figcaption > ul {
display:none;
text-align: right;
}
figcaption ul li {
display: inline-block;
}
nav li {
display: inline-block;
padding: 0 0.6em 0 0;