- now checking images against google vision api (why google, despite my despise of google, the company): vision api is the only one which is simple enough to use and their labelling is reasonable.

- checking text against google natural language api: the strict classification it offers is better, than free folksonomy, if I ever want to connect entries based on topic
unfortunately they don't support Hungarian yet.
This commit is contained in:
Peter Molnar 2018-12-11 14:06:18 +00:00
parent 033a00db8e
commit 26c6ef77ed
7 changed files with 150 additions and 36 deletions

1
.gitignore vendored
View file

@ -5,4 +5,3 @@ keys.py
lib lib
gcloud.json gcloud.json
tests/.Exif.tests.jpg.json tests/.Exif.tests.jpg.json
Pipfile.lock

View file

@ -10,6 +10,7 @@ import json
import os import os
import keys import keys
import requests import requests
import logging
from pprint import pprint from pprint import pprint
@ -58,6 +59,38 @@ class CachedMeta(dict):
for k, v in data.items(): for k, v in data.items():
self[k] = v self[k] = v
class GoogleClassifyText(CachedMeta):
def __init__(self, fpath, txt, lang='en'):
self.fpath = fpath
self.txt = txt
self.lang = lang
self._read()
def _call_tool(self):
params = {
"document": {
"type": "PLAIN_TEXT",
"content": self.txt,
"language": self.lang,
}
}
url = "https://language.googleapis.com/v1beta2/documents:classifyText?key=%s" % (
keys.gcloud.get('key')
)
logging.info('calling Google classidyText')
r = requests.post(url, json=params)
try:
resp = r.json()
for cat in resp.get('categories', []):
self[cat.get('name')] = cat.get('confidence')
except Exception as e:
logging.error(
'failed to call Google Vision API on: %s, reason: %s',
self.fpath,
e
)
class GoogleVision(CachedMeta): class GoogleVision(CachedMeta):
def __init__(self, fpath, imgurl): def __init__(self, fpath, imgurl):
self.fpath = fpath self.fpath = fpath
@ -65,49 +98,84 @@ class GoogleVision(CachedMeta):
self._read() self._read()
@property @property
def cntr(self): def response(self):
curr = 0 if 'responses' not in self:
if os.path.exists('/tmp/visionapicallcounter'): return {}
with open('/tmp/visionapicallcounter', 'rt') as f: if not len(self['responses']):
curr = int(f.read()) return {}
curr = curr + 1 if 'labelAnnotations' not in self['responses'][0]:
with open('/tmp/visionapicallcounter', 'wt') as f: return {}
f.write("%d" % curr) return self['responses'][0]
return curr
@property
def tags(self):
tags = []
if 'labelAnnotations' in self.response:
for label in self.response['labelAnnotations']:
tags.append(label['description'])
if 'webDetection' in self.response:
if 'webEntities' in self.response['webDetection']:
for label in self.response['webDetection']['webEntities']:
tags.append(label['description'])
return tags
@property
def landmark(self):
landmark = None
if 'landmarkAnnotations' in self.response:
if len(self.response['landmarkAnnotations']):
match = self.response['landmarkAnnotations'].pop()
landmark = {
'name': match['description'],
'latitude': match['locations'][0]['latLng']['latitude'],
'longitude': match['locations'][0]['latLng']['longitude']
}
return landmark
@property
def onlinecopies(self):
copies = []
if 'webDetection' in self.response:
if 'pagesWithMatchingImages' in self.response['webDetection']:
for match in self.response['webDetection']['pagesWithMatchingImages']:
copies.append(match['url'])
return copies
def _call_tool(self): def _call_tool(self):
if (self.cntr >= 500 ):
raise ValueError('already at 500 requests!')
params = { params = {
"requests": [ "requests": [{
{ "image": {"source": {"imageUri": self.imgurl}},
"image": { "features": [
"source": { {
"imageUri": self.imgurl, "type": "LANDMARK_DETECTION",
} },
}, {
"features": [ "type": "WEB_DETECTION",
{ },
"type": "LANDMARK_DETECTION", {
}, "type": "LABEL_DETECTION",
{ }
"type": "LABEL_DETECTION", ]
}, }]
]
}
]
} }
url = "https://vision.googleapis.com/v1/images:annotate?key=%s" % (keys.gcloud.get('key')) url = "https://vision.googleapis.com/v1/images:annotate?key=%s" % (
keys.gcloud.get('key')
)
logging.info('calling Google Vision API for %s', self.fpath)
r = requests.post(url, json=params) r = requests.post(url, json=params)
try: try:
resp = r.json() resp = r.json()
resp = resp['responses'][0]
for k, v in resp.items(): for k, v in resp.items():
self[k] = v self[k] = v
except Exception as e: except Exception as e:
logging.error('failed to call Google Vision API on: %s, reason: %s', self.fpath, e) logging.error(
'failed to call Google Vision API on: %s, reason: %s',
self.fpath,
e
)
class Exif(CachedMeta): class Exif(CachedMeta):
def __init__(self, fpath): def __init__(self, fpath):

26
nasg.py
View file

@ -30,7 +30,7 @@ from emoji import UNICODE_EMOJI
from slugify import slugify from slugify import slugify
import requests import requests
from pandoc import Pandoc from pandoc import Pandoc
from exiftool import Exif, GoogleVision from meta import Exif, GoogleVision, GoogleClassifyText
import settings import settings
import keys import keys
@ -506,6 +506,16 @@ class Singular(MarkdownDoc):
pass pass
return lang return lang
@property
def classification(self):
c = GoogleClassifyText(self.fpath, self.content, self.lang)
k = '/Arts & Entertainment/Visual Art & Design/Photographic & Digital Arts'
if self.is_photo and k not in c.keys():
c.update({
k : '1.0'
})
return c
@property @property
def url(self): def url(self):
return "%s/%s/" % ( return "%s/%s/" % (
@ -578,6 +588,7 @@ class Singular(MarkdownDoc):
'review': self.review, 'review': self.review,
'has_code': self.has_code, 'has_code': self.has_code,
'event': self.event, 'event': self.event,
'classification': self.classification.keys()
} }
if (self.is_photo): if (self.is_photo):
v.update({ v.update({
@ -707,7 +718,8 @@ class WebImage(object):
'caption': self.caption, 'caption': self.caption,
'exif': self.exif, 'exif': self.exif,
'is_photo': self.is_photo, 'is_photo': self.is_photo,
'is_mainimg': self.is_mainimg 'is_mainimg': self.is_mainimg,
'onlinecopies': self.onlinecopies
} }
def __str__(self): def __str__(self):
@ -717,9 +729,17 @@ class WebImage(object):
return tmpl.render(self.tmplvars) return tmpl.render(self.tmplvars)
@cached_property @cached_property
def vision(self): def visionapi(self):
return GoogleVision(self.fpath, self.src) return GoogleVision(self.fpath, self.src)
@property
def onlinecopies(self):
copies = {}
for m in self.visionapi.onlinecopies:
if settings.site.get('domain') not in m:
copies[m] = True
return copies.keys()
@cached_property @cached_property
def meta(self): def meta(self):
return Exif(self.fpath) return Exif(self.fpath)

View file

@ -60,6 +60,7 @@ author = {
'github': 'https://github.com/petermolnar', 'github': 'https://github.com/petermolnar',
'instagram': 'https://www.instagram.com/petermolnarnet/', 'instagram': 'https://www.instagram.com/petermolnarnet/',
'twitter': 'https://twitter.com/petermolnar', 'twitter': 'https://twitter.com/petermolnar',
'micro.blog': 'https://micro.blog/petermolnar',
} }
} }

View file

@ -53,6 +53,13 @@
{{ exif.lens }} {{ exif.lens }}
</dd> </dd>
</dl> </dl>
{% endif %}
{% if onlinecopies|length > 1 %}
<ul>
{% for copy in onlinecopies %}
<li><a href="{{ copy }}">[{{ loop.index }}]</a></li>
{% endfor %}
</ul>
{% endif %} {% endif %}
</figcaption> </figcaption>
</figure> </figure>

View file

@ -218,6 +218,15 @@
</a> </a>
</dd> </dd>
<dt>Classification</dt>
<dd>
<ul>
{% for c in post.classification %}
<li>{{ c }}</li>
{% endfor %}
</ul>
</dd>
<dt>License</dt> <dt>License</dt>
<dd class="license"> <dd class="license">
{% if post.licence == 'CC-BY-4.0' %} {% if post.licence == 'CC-BY-4.0' %}

View file

@ -133,12 +133,22 @@ input {
border-bottom: 3px solid #ccc; border-bottom: 3px solid #ccc;
} }
figcaption > ul,
nav ul { nav ul {
list-style-type: none; list-style-type: none;
margin: 0; margin: 0;
padding: 0; padding: 0;
} }
figcaption > ul {
display:none;
text-align: right;
}
figcaption ul li {
display: inline-block;
}
nav li { nav li {
display: inline-block; display: inline-block;
padding: 0 0.6em 0 0; padding: 0 0.6em 0 0;