- now checking images against google vision api (why google, despite my despise of google, the company): vision api is the only one which is simple enough to use and their labelling is reasonable.
- checking text against google natural language api: the strict classification it offers is better, than free folksonomy, if I ever want to connect entries based on topic unfortunately they don't support Hungarian yet.
This commit is contained in:
parent
033a00db8e
commit
26c6ef77ed
7 changed files with 150 additions and 36 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -5,4 +5,3 @@ keys.py
|
|||
lib
|
||||
gcloud.json
|
||||
tests/.Exif.tests.jpg.json
|
||||
Pipfile.lock
|
||||
|
|
|
@ -10,6 +10,7 @@ import json
|
|||
import os
|
||||
import keys
|
||||
import requests
|
||||
import logging
|
||||
|
||||
from pprint import pprint
|
||||
|
||||
|
@ -58,6 +59,38 @@ class CachedMeta(dict):
|
|||
for k, v in data.items():
|
||||
self[k] = v
|
||||
|
||||
class GoogleClassifyText(CachedMeta):
|
||||
def __init__(self, fpath, txt, lang='en'):
|
||||
self.fpath = fpath
|
||||
self.txt = txt
|
||||
self.lang = lang
|
||||
self._read()
|
||||
|
||||
def _call_tool(self):
|
||||
params = {
|
||||
"document": {
|
||||
"type": "PLAIN_TEXT",
|
||||
"content": self.txt,
|
||||
"language": self.lang,
|
||||
}
|
||||
}
|
||||
|
||||
url = "https://language.googleapis.com/v1beta2/documents:classifyText?key=%s" % (
|
||||
keys.gcloud.get('key')
|
||||
)
|
||||
logging.info('calling Google classidyText')
|
||||
r = requests.post(url, json=params)
|
||||
try:
|
||||
resp = r.json()
|
||||
for cat in resp.get('categories', []):
|
||||
self[cat.get('name')] = cat.get('confidence')
|
||||
except Exception as e:
|
||||
logging.error(
|
||||
'failed to call Google Vision API on: %s, reason: %s',
|
||||
self.fpath,
|
||||
e
|
||||
)
|
||||
|
||||
class GoogleVision(CachedMeta):
|
||||
def __init__(self, fpath, imgurl):
|
||||
self.fpath = fpath
|
||||
|
@ -65,49 +98,84 @@ class GoogleVision(CachedMeta):
|
|||
self._read()
|
||||
|
||||
@property
|
||||
def cntr(self):
|
||||
curr = 0
|
||||
if os.path.exists('/tmp/visionapicallcounter'):
|
||||
with open('/tmp/visionapicallcounter', 'rt') as f:
|
||||
curr = int(f.read())
|
||||
curr = curr + 1
|
||||
with open('/tmp/visionapicallcounter', 'wt') as f:
|
||||
f.write("%d" % curr)
|
||||
return curr
|
||||
def response(self):
|
||||
if 'responses' not in self:
|
||||
return {}
|
||||
if not len(self['responses']):
|
||||
return {}
|
||||
if 'labelAnnotations' not in self['responses'][0]:
|
||||
return {}
|
||||
return self['responses'][0]
|
||||
|
||||
@property
|
||||
def tags(self):
|
||||
tags = []
|
||||
|
||||
if 'labelAnnotations' in self.response:
|
||||
for label in self.response['labelAnnotations']:
|
||||
tags.append(label['description'])
|
||||
|
||||
if 'webDetection' in self.response:
|
||||
if 'webEntities' in self.response['webDetection']:
|
||||
for label in self.response['webDetection']['webEntities']:
|
||||
tags.append(label['description'])
|
||||
return tags
|
||||
|
||||
@property
|
||||
def landmark(self):
|
||||
landmark = None
|
||||
if 'landmarkAnnotations' in self.response:
|
||||
if len(self.response['landmarkAnnotations']):
|
||||
match = self.response['landmarkAnnotations'].pop()
|
||||
landmark = {
|
||||
'name': match['description'],
|
||||
'latitude': match['locations'][0]['latLng']['latitude'],
|
||||
'longitude': match['locations'][0]['latLng']['longitude']
|
||||
}
|
||||
return landmark
|
||||
|
||||
@property
|
||||
def onlinecopies(self):
|
||||
copies = []
|
||||
if 'webDetection' in self.response:
|
||||
if 'pagesWithMatchingImages' in self.response['webDetection']:
|
||||
for match in self.response['webDetection']['pagesWithMatchingImages']:
|
||||
copies.append(match['url'])
|
||||
return copies
|
||||
|
||||
def _call_tool(self):
|
||||
if (self.cntr >= 500 ):
|
||||
raise ValueError('already at 500 requests!')
|
||||
|
||||
params = {
|
||||
"requests": [
|
||||
{
|
||||
"image": {
|
||||
"source": {
|
||||
"imageUri": self.imgurl,
|
||||
}
|
||||
},
|
||||
"features": [
|
||||
{
|
||||
"type": "LANDMARK_DETECTION",
|
||||
},
|
||||
{
|
||||
"type": "LABEL_DETECTION",
|
||||
},
|
||||
]
|
||||
}
|
||||
]
|
||||
"requests": [{
|
||||
"image": {"source": {"imageUri": self.imgurl}},
|
||||
"features": [
|
||||
{
|
||||
"type": "LANDMARK_DETECTION",
|
||||
},
|
||||
{
|
||||
"type": "WEB_DETECTION",
|
||||
},
|
||||
{
|
||||
"type": "LABEL_DETECTION",
|
||||
}
|
||||
]
|
||||
}]
|
||||
}
|
||||
|
||||
url = "https://vision.googleapis.com/v1/images:annotate?key=%s" % (keys.gcloud.get('key'))
|
||||
url = "https://vision.googleapis.com/v1/images:annotate?key=%s" % (
|
||||
keys.gcloud.get('key')
|
||||
)
|
||||
logging.info('calling Google Vision API for %s', self.fpath)
|
||||
r = requests.post(url, json=params)
|
||||
try:
|
||||
resp = r.json()
|
||||
resp = resp['responses'][0]
|
||||
for k, v in resp.items():
|
||||
self[k] = v
|
||||
except Exception as e:
|
||||
logging.error('failed to call Google Vision API on: %s, reason: %s', self.fpath, e)
|
||||
logging.error(
|
||||
'failed to call Google Vision API on: %s, reason: %s',
|
||||
self.fpath,
|
||||
e
|
||||
)
|
||||
|
||||
class Exif(CachedMeta):
|
||||
def __init__(self, fpath):
|
26
nasg.py
26
nasg.py
|
@ -30,7 +30,7 @@ from emoji import UNICODE_EMOJI
|
|||
from slugify import slugify
|
||||
import requests
|
||||
from pandoc import Pandoc
|
||||
from exiftool import Exif, GoogleVision
|
||||
from meta import Exif, GoogleVision, GoogleClassifyText
|
||||
import settings
|
||||
import keys
|
||||
|
||||
|
@ -506,6 +506,16 @@ class Singular(MarkdownDoc):
|
|||
pass
|
||||
return lang
|
||||
|
||||
@property
|
||||
def classification(self):
|
||||
c = GoogleClassifyText(self.fpath, self.content, self.lang)
|
||||
k = '/Arts & Entertainment/Visual Art & Design/Photographic & Digital Arts'
|
||||
if self.is_photo and k not in c.keys():
|
||||
c.update({
|
||||
k : '1.0'
|
||||
})
|
||||
return c
|
||||
|
||||
@property
|
||||
def url(self):
|
||||
return "%s/%s/" % (
|
||||
|
@ -578,6 +588,7 @@ class Singular(MarkdownDoc):
|
|||
'review': self.review,
|
||||
'has_code': self.has_code,
|
||||
'event': self.event,
|
||||
'classification': self.classification.keys()
|
||||
}
|
||||
if (self.is_photo):
|
||||
v.update({
|
||||
|
@ -707,7 +718,8 @@ class WebImage(object):
|
|||
'caption': self.caption,
|
||||
'exif': self.exif,
|
||||
'is_photo': self.is_photo,
|
||||
'is_mainimg': self.is_mainimg
|
||||
'is_mainimg': self.is_mainimg,
|
||||
'onlinecopies': self.onlinecopies
|
||||
}
|
||||
|
||||
def __str__(self):
|
||||
|
@ -717,9 +729,17 @@ class WebImage(object):
|
|||
return tmpl.render(self.tmplvars)
|
||||
|
||||
@cached_property
|
||||
def vision(self):
|
||||
def visionapi(self):
|
||||
return GoogleVision(self.fpath, self.src)
|
||||
|
||||
@property
|
||||
def onlinecopies(self):
|
||||
copies = {}
|
||||
for m in self.visionapi.onlinecopies:
|
||||
if settings.site.get('domain') not in m:
|
||||
copies[m] = True
|
||||
return copies.keys()
|
||||
|
||||
@cached_property
|
||||
def meta(self):
|
||||
return Exif(self.fpath)
|
||||
|
|
|
@ -60,6 +60,7 @@ author = {
|
|||
'github': 'https://github.com/petermolnar',
|
||||
'instagram': 'https://www.instagram.com/petermolnarnet/',
|
||||
'twitter': 'https://twitter.com/petermolnar',
|
||||
'micro.blog': 'https://micro.blog/petermolnar',
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -53,6 +53,13 @@
|
|||
{{ exif.lens }}
|
||||
</dd>
|
||||
</dl>
|
||||
{% endif %}
|
||||
{% if onlinecopies|length > 1 %}
|
||||
<ul>
|
||||
{% for copy in onlinecopies %}
|
||||
<li><a href="{{ copy }}">[{{ loop.index }}]</a></li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
{% endif %}
|
||||
</figcaption>
|
||||
</figure>
|
||||
|
|
|
@ -218,6 +218,15 @@
|
|||
</a>
|
||||
</dd>
|
||||
|
||||
<dt>Classification</dt>
|
||||
<dd>
|
||||
<ul>
|
||||
{% for c in post.classification %}
|
||||
<li>{{ c }}</li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
</dd>
|
||||
|
||||
<dt>License</dt>
|
||||
<dd class="license">
|
||||
{% if post.licence == 'CC-BY-4.0' %}
|
||||
|
|
|
@ -133,12 +133,22 @@ input {
|
|||
border-bottom: 3px solid #ccc;
|
||||
}
|
||||
|
||||
figcaption > ul,
|
||||
nav ul {
|
||||
list-style-type: none;
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
}
|
||||
|
||||
figcaption > ul {
|
||||
display:none;
|
||||
text-align: right;
|
||||
}
|
||||
|
||||
figcaption ul li {
|
||||
display: inline-block;
|
||||
}
|
||||
|
||||
nav li {
|
||||
display: inline-block;
|
||||
padding: 0 0.6em 0 0;
|
||||
|
|
Loading…
Reference in a new issue