- now checking images against google vision api (why google, despite my despise of google, the company): vision api is the only one which is simple enough to use and their labelling is reasonable.
- checking text against google natural language api: the strict classification it offers is better, than free folksonomy, if I ever want to connect entries based on topic unfortunately they don't support Hungarian yet.
This commit is contained in:
parent
033a00db8e
commit
26c6ef77ed
7 changed files with 150 additions and 36 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -5,4 +5,3 @@ keys.py
|
||||||
lib
|
lib
|
||||||
gcloud.json
|
gcloud.json
|
||||||
tests/.Exif.tests.jpg.json
|
tests/.Exif.tests.jpg.json
|
||||||
Pipfile.lock
|
|
||||||
|
|
|
@ -10,6 +10,7 @@ import json
|
||||||
import os
|
import os
|
||||||
import keys
|
import keys
|
||||||
import requests
|
import requests
|
||||||
|
import logging
|
||||||
|
|
||||||
from pprint import pprint
|
from pprint import pprint
|
||||||
|
|
||||||
|
@ -58,6 +59,38 @@ class CachedMeta(dict):
|
||||||
for k, v in data.items():
|
for k, v in data.items():
|
||||||
self[k] = v
|
self[k] = v
|
||||||
|
|
||||||
|
class GoogleClassifyText(CachedMeta):
|
||||||
|
def __init__(self, fpath, txt, lang='en'):
|
||||||
|
self.fpath = fpath
|
||||||
|
self.txt = txt
|
||||||
|
self.lang = lang
|
||||||
|
self._read()
|
||||||
|
|
||||||
|
def _call_tool(self):
|
||||||
|
params = {
|
||||||
|
"document": {
|
||||||
|
"type": "PLAIN_TEXT",
|
||||||
|
"content": self.txt,
|
||||||
|
"language": self.lang,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
url = "https://language.googleapis.com/v1beta2/documents:classifyText?key=%s" % (
|
||||||
|
keys.gcloud.get('key')
|
||||||
|
)
|
||||||
|
logging.info('calling Google classidyText')
|
||||||
|
r = requests.post(url, json=params)
|
||||||
|
try:
|
||||||
|
resp = r.json()
|
||||||
|
for cat in resp.get('categories', []):
|
||||||
|
self[cat.get('name')] = cat.get('confidence')
|
||||||
|
except Exception as e:
|
||||||
|
logging.error(
|
||||||
|
'failed to call Google Vision API on: %s, reason: %s',
|
||||||
|
self.fpath,
|
||||||
|
e
|
||||||
|
)
|
||||||
|
|
||||||
class GoogleVision(CachedMeta):
|
class GoogleVision(CachedMeta):
|
||||||
def __init__(self, fpath, imgurl):
|
def __init__(self, fpath, imgurl):
|
||||||
self.fpath = fpath
|
self.fpath = fpath
|
||||||
|
@ -65,49 +98,84 @@ class GoogleVision(CachedMeta):
|
||||||
self._read()
|
self._read()
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def cntr(self):
|
def response(self):
|
||||||
curr = 0
|
if 'responses' not in self:
|
||||||
if os.path.exists('/tmp/visionapicallcounter'):
|
return {}
|
||||||
with open('/tmp/visionapicallcounter', 'rt') as f:
|
if not len(self['responses']):
|
||||||
curr = int(f.read())
|
return {}
|
||||||
curr = curr + 1
|
if 'labelAnnotations' not in self['responses'][0]:
|
||||||
with open('/tmp/visionapicallcounter', 'wt') as f:
|
return {}
|
||||||
f.write("%d" % curr)
|
return self['responses'][0]
|
||||||
return curr
|
|
||||||
|
@property
|
||||||
|
def tags(self):
|
||||||
|
tags = []
|
||||||
|
|
||||||
|
if 'labelAnnotations' in self.response:
|
||||||
|
for label in self.response['labelAnnotations']:
|
||||||
|
tags.append(label['description'])
|
||||||
|
|
||||||
|
if 'webDetection' in self.response:
|
||||||
|
if 'webEntities' in self.response['webDetection']:
|
||||||
|
for label in self.response['webDetection']['webEntities']:
|
||||||
|
tags.append(label['description'])
|
||||||
|
return tags
|
||||||
|
|
||||||
|
@property
|
||||||
|
def landmark(self):
|
||||||
|
landmark = None
|
||||||
|
if 'landmarkAnnotations' in self.response:
|
||||||
|
if len(self.response['landmarkAnnotations']):
|
||||||
|
match = self.response['landmarkAnnotations'].pop()
|
||||||
|
landmark = {
|
||||||
|
'name': match['description'],
|
||||||
|
'latitude': match['locations'][0]['latLng']['latitude'],
|
||||||
|
'longitude': match['locations'][0]['latLng']['longitude']
|
||||||
|
}
|
||||||
|
return landmark
|
||||||
|
|
||||||
|
@property
|
||||||
|
def onlinecopies(self):
|
||||||
|
copies = []
|
||||||
|
if 'webDetection' in self.response:
|
||||||
|
if 'pagesWithMatchingImages' in self.response['webDetection']:
|
||||||
|
for match in self.response['webDetection']['pagesWithMatchingImages']:
|
||||||
|
copies.append(match['url'])
|
||||||
|
return copies
|
||||||
|
|
||||||
def _call_tool(self):
|
def _call_tool(self):
|
||||||
if (self.cntr >= 500 ):
|
|
||||||
raise ValueError('already at 500 requests!')
|
|
||||||
|
|
||||||
params = {
|
params = {
|
||||||
"requests": [
|
"requests": [{
|
||||||
{
|
"image": {"source": {"imageUri": self.imgurl}},
|
||||||
"image": {
|
"features": [
|
||||||
"source": {
|
{
|
||||||
"imageUri": self.imgurl,
|
"type": "LANDMARK_DETECTION",
|
||||||
}
|
},
|
||||||
},
|
{
|
||||||
"features": [
|
"type": "WEB_DETECTION",
|
||||||
{
|
},
|
||||||
"type": "LANDMARK_DETECTION",
|
{
|
||||||
},
|
"type": "LABEL_DETECTION",
|
||||||
{
|
}
|
||||||
"type": "LABEL_DETECTION",
|
]
|
||||||
},
|
}]
|
||||||
]
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
}
|
||||||
|
|
||||||
url = "https://vision.googleapis.com/v1/images:annotate?key=%s" % (keys.gcloud.get('key'))
|
url = "https://vision.googleapis.com/v1/images:annotate?key=%s" % (
|
||||||
|
keys.gcloud.get('key')
|
||||||
|
)
|
||||||
|
logging.info('calling Google Vision API for %s', self.fpath)
|
||||||
r = requests.post(url, json=params)
|
r = requests.post(url, json=params)
|
||||||
try:
|
try:
|
||||||
resp = r.json()
|
resp = r.json()
|
||||||
resp = resp['responses'][0]
|
|
||||||
for k, v in resp.items():
|
for k, v in resp.items():
|
||||||
self[k] = v
|
self[k] = v
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error('failed to call Google Vision API on: %s, reason: %s', self.fpath, e)
|
logging.error(
|
||||||
|
'failed to call Google Vision API on: %s, reason: %s',
|
||||||
|
self.fpath,
|
||||||
|
e
|
||||||
|
)
|
||||||
|
|
||||||
class Exif(CachedMeta):
|
class Exif(CachedMeta):
|
||||||
def __init__(self, fpath):
|
def __init__(self, fpath):
|
26
nasg.py
26
nasg.py
|
@ -30,7 +30,7 @@ from emoji import UNICODE_EMOJI
|
||||||
from slugify import slugify
|
from slugify import slugify
|
||||||
import requests
|
import requests
|
||||||
from pandoc import Pandoc
|
from pandoc import Pandoc
|
||||||
from exiftool import Exif, GoogleVision
|
from meta import Exif, GoogleVision, GoogleClassifyText
|
||||||
import settings
|
import settings
|
||||||
import keys
|
import keys
|
||||||
|
|
||||||
|
@ -506,6 +506,16 @@ class Singular(MarkdownDoc):
|
||||||
pass
|
pass
|
||||||
return lang
|
return lang
|
||||||
|
|
||||||
|
@property
|
||||||
|
def classification(self):
|
||||||
|
c = GoogleClassifyText(self.fpath, self.content, self.lang)
|
||||||
|
k = '/Arts & Entertainment/Visual Art & Design/Photographic & Digital Arts'
|
||||||
|
if self.is_photo and k not in c.keys():
|
||||||
|
c.update({
|
||||||
|
k : '1.0'
|
||||||
|
})
|
||||||
|
return c
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def url(self):
|
def url(self):
|
||||||
return "%s/%s/" % (
|
return "%s/%s/" % (
|
||||||
|
@ -578,6 +588,7 @@ class Singular(MarkdownDoc):
|
||||||
'review': self.review,
|
'review': self.review,
|
||||||
'has_code': self.has_code,
|
'has_code': self.has_code,
|
||||||
'event': self.event,
|
'event': self.event,
|
||||||
|
'classification': self.classification.keys()
|
||||||
}
|
}
|
||||||
if (self.is_photo):
|
if (self.is_photo):
|
||||||
v.update({
|
v.update({
|
||||||
|
@ -707,7 +718,8 @@ class WebImage(object):
|
||||||
'caption': self.caption,
|
'caption': self.caption,
|
||||||
'exif': self.exif,
|
'exif': self.exif,
|
||||||
'is_photo': self.is_photo,
|
'is_photo': self.is_photo,
|
||||||
'is_mainimg': self.is_mainimg
|
'is_mainimg': self.is_mainimg,
|
||||||
|
'onlinecopies': self.onlinecopies
|
||||||
}
|
}
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
|
@ -717,9 +729,17 @@ class WebImage(object):
|
||||||
return tmpl.render(self.tmplvars)
|
return tmpl.render(self.tmplvars)
|
||||||
|
|
||||||
@cached_property
|
@cached_property
|
||||||
def vision(self):
|
def visionapi(self):
|
||||||
return GoogleVision(self.fpath, self.src)
|
return GoogleVision(self.fpath, self.src)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def onlinecopies(self):
|
||||||
|
copies = {}
|
||||||
|
for m in self.visionapi.onlinecopies:
|
||||||
|
if settings.site.get('domain') not in m:
|
||||||
|
copies[m] = True
|
||||||
|
return copies.keys()
|
||||||
|
|
||||||
@cached_property
|
@cached_property
|
||||||
def meta(self):
|
def meta(self):
|
||||||
return Exif(self.fpath)
|
return Exif(self.fpath)
|
||||||
|
|
|
@ -60,6 +60,7 @@ author = {
|
||||||
'github': 'https://github.com/petermolnar',
|
'github': 'https://github.com/petermolnar',
|
||||||
'instagram': 'https://www.instagram.com/petermolnarnet/',
|
'instagram': 'https://www.instagram.com/petermolnarnet/',
|
||||||
'twitter': 'https://twitter.com/petermolnar',
|
'twitter': 'https://twitter.com/petermolnar',
|
||||||
|
'micro.blog': 'https://micro.blog/petermolnar',
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -53,6 +53,13 @@
|
||||||
{{ exif.lens }}
|
{{ exif.lens }}
|
||||||
</dd>
|
</dd>
|
||||||
</dl>
|
</dl>
|
||||||
|
{% endif %}
|
||||||
|
{% if onlinecopies|length > 1 %}
|
||||||
|
<ul>
|
||||||
|
{% for copy in onlinecopies %}
|
||||||
|
<li><a href="{{ copy }}">[{{ loop.index }}]</a></li>
|
||||||
|
{% endfor %}
|
||||||
|
</ul>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
</figcaption>
|
</figcaption>
|
||||||
</figure>
|
</figure>
|
||||||
|
|
|
@ -218,6 +218,15 @@
|
||||||
</a>
|
</a>
|
||||||
</dd>
|
</dd>
|
||||||
|
|
||||||
|
<dt>Classification</dt>
|
||||||
|
<dd>
|
||||||
|
<ul>
|
||||||
|
{% for c in post.classification %}
|
||||||
|
<li>{{ c }}</li>
|
||||||
|
{% endfor %}
|
||||||
|
</ul>
|
||||||
|
</dd>
|
||||||
|
|
||||||
<dt>License</dt>
|
<dt>License</dt>
|
||||||
<dd class="license">
|
<dd class="license">
|
||||||
{% if post.licence == 'CC-BY-4.0' %}
|
{% if post.licence == 'CC-BY-4.0' %}
|
||||||
|
|
|
@ -133,12 +133,22 @@ input {
|
||||||
border-bottom: 3px solid #ccc;
|
border-bottom: 3px solid #ccc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
figcaption > ul,
|
||||||
nav ul {
|
nav ul {
|
||||||
list-style-type: none;
|
list-style-type: none;
|
||||||
margin: 0;
|
margin: 0;
|
||||||
padding: 0;
|
padding: 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
figcaption > ul {
|
||||||
|
display:none;
|
||||||
|
text-align: right;
|
||||||
|
}
|
||||||
|
|
||||||
|
figcaption ul li {
|
||||||
|
display: inline-block;
|
||||||
|
}
|
||||||
|
|
||||||
nav li {
|
nav li {
|
||||||
display: inline-block;
|
display: inline-block;
|
||||||
padding: 0 0.6em 0 0;
|
padding: 0 0.6em 0 0;
|
||||||
|
|
Loading…
Reference in a new issue