- now checking images against google vision api (why google, despite my despise of google, the company): vision api is the only one which is simple enough to use and their labelling is reasonable.

- checking text against google natural language api: the strict classification it offers is better, than free folksonomy, if I ever want to connect entries based on topic unfortunately they don't support Hungarian yet.
2018-12-11 14:06:18 +00:00 · 2018-12-11 14:06:18 +00:00 · 26c6ef77ed
commit 26c6ef77ed
parent 033a00db8e
7 changed files with 150 additions and 36 deletions
--- a/.gitignore
+++ b/.gitignore
@ -5,4 +5,3 @@ keys.py
 lib
 gcloud.json
 tests/.Exif.tests.jpg.json
 Pipfile.lock
--- a/exiftool.py
+++ b/exiftool.py
@ -10,6 +10,7 @@ import json
 import os
 import keys
 import requests
 import logging
 from pprint import pprint
@ -58,6 +59,38 @@ class CachedMeta(dict):
            for k, v in data.items():
                self[k] = v
 class GoogleClassifyText(CachedMeta):
    def __init__(self, fpath, txt, lang='en'):
        self.fpath = fpath
        self.txt = txt
        self.lang = lang
        self._read()
    def _call_tool(self):
        params = {
            "document": {
                "type": "PLAIN_TEXT",
                "content": self.txt,
                "language": self.lang,
            }
        }
        url = "https://language.googleapis.com/v1beta2/documents:classifyText?key=%s" % (
            keys.gcloud.get('key')
        )
        logging.info('calling Google classidyText')
        r = requests.post(url, json=params)
        try:
            resp = r.json()
            for cat in resp.get('categories', []):
                self[cat.get('name')] = cat.get('confidence')
        except Exception as e:
            logging.error(
                'failed to call Google Vision API on: %s, reason: %s',
                self.fpath,
                e
            )
 class GoogleVision(CachedMeta):
    def __init__(self, fpath, imgurl):
        self.fpath = fpath
@ -65,49 +98,84 @@ class GoogleVision(CachedMeta):
        self._read()
    @property
-    def cntr(self):
+    def response(self):
-        curr = 0
+        if 'responses' not in self:
-        if os.path.exists('/tmp/visionapicallcounter'):
+            return {}
-            with open('/tmp/visionapicallcounter', 'rt') as f:
+        if not len(self['responses']):
-                curr = int(f.read())
+            return {}
-        curr = curr + 1
+        if 'labelAnnotations' not in self['responses'][0]:
-        with open('/tmp/visionapicallcounter', 'wt') as f:
+            return {}
-            f.write("%d" % curr)
+        return self['responses'][0]
-        return curr
+
    @property
    def tags(self):
        tags = []
        if 'labelAnnotations' in self.response:
            for label in self.response['labelAnnotations']:
                tags.append(label['description'])
        if 'webDetection' in self.response:
            if 'webEntities' in self.response['webDetection']:
                for label in self.response['webDetection']['webEntities']:
                    tags.append(label['description'])
        return tags
    @property
    def landmark(self):
        landmark = None
        if 'landmarkAnnotations' in self.response:
            if len(self.response['landmarkAnnotations']):
                match = self.response['landmarkAnnotations'].pop()
                landmark = {
                    'name': match['description'],
                    'latitude': match['locations'][0]['latLng']['latitude'],
                    'longitude': match['locations'][0]['latLng']['longitude']
                }
        return landmark
    @property
    def onlinecopies(self):
        copies = []
        if 'webDetection' in self.response:
            if 'pagesWithMatchingImages' in self.response['webDetection']:
                for match in self.response['webDetection']['pagesWithMatchingImages']:
                    copies.append(match['url'])
        return copies
    def _call_tool(self):
        if (self.cntr >= 500 ):
            raise ValueError('already at 500 requests!')
        params = {
-          "requests": [
+            "requests": [{
-            {
+                "image": {"source": {"imageUri": self.imgurl}},
-              "image": {
+                "features": [
-                "source": {
+                    {
-                  "imageUri": self.imgurl,
+                      "type": "LANDMARK_DETECTION",
-                }
+                    },
-              },
+                    {
-              "features": [
+                      "type": "WEB_DETECTION",
-                {
+                    },
-                  "type": "LANDMARK_DETECTION",
+                    {
-                },
+                      "type": "LABEL_DETECTION",
-                {
+                    }
-                  "type": "LABEL_DETECTION",
+                ]
-                },
+            }]
              ]
            }
          ]
        }
-        url = "https://vision.googleapis.com/v1/images:annotate?key=%s" % (keys.gcloud.get('key'))
+        url = "https://vision.googleapis.com/v1/images:annotate?key=%s" % (
            keys.gcloud.get('key')
        )
        logging.info('calling Google Vision API for %s', self.fpath)
        r = requests.post(url, json=params)
        try:
            resp = r.json()
            resp = resp['responses'][0]
            for k, v in resp.items():
                self[k] = v
        except Exception as e:
-            logging.error('failed to call Google Vision API on: %s, reason: %s', self.fpath, e)
+            logging.error(
                'failed to call Google Vision API on: %s, reason: %s',
                self.fpath,
                e
            )
 class Exif(CachedMeta):
    def __init__(self, fpath):
--- a/nasg.py
+++ b/nasg.py
@ -30,7 +30,7 @@ from emoji import UNICODE_EMOJI
 from slugify import slugify
 import requests
 from pandoc import Pandoc
-from exiftool import Exif, GoogleVision
+from meta import Exif, GoogleVision, GoogleClassifyText
 import settings
 import keys
@ -506,6 +506,16 @@ class Singular(MarkdownDoc):
            pass
        return lang
    @property
    def classification(self):
        c = GoogleClassifyText(self.fpath, self.content, self.lang)
        k = '/Arts & Entertainment/Visual Art & Design/Photographic & Digital Arts'
        if self.is_photo and k not in c.keys():
            c.update({
                k : '1.0'
            })
        return c
    @property
    def url(self):
        return "%s/%s/" % (
@ -578,6 +588,7 @@ class Singular(MarkdownDoc):
            'review': self.review,
            'has_code': self.has_code,
            'event': self.event,
            'classification': self.classification.keys()
        }
        if (self.is_photo):
            v.update({
@ -707,7 +718,8 @@ class WebImage(object):
            'caption': self.caption,
            'exif': self.exif,
            'is_photo': self.is_photo,
-            'is_mainimg': self.is_mainimg
+            'is_mainimg': self.is_mainimg,
            'onlinecopies': self.onlinecopies
        }
    def __str__(self):
@ -717,9 +729,17 @@ class WebImage(object):
        return tmpl.render(self.tmplvars)
    @cached_property
-    def vision(self):
+    def visionapi(self):
        return GoogleVision(self.fpath, self.src)
    @property
    def onlinecopies(self):
        copies = {}
        for m in self.visionapi.onlinecopies:
            if settings.site.get('domain') not in m:
                copies[m] = True
        return copies.keys()
    @cached_property
    def meta(self):
        return Exif(self.fpath)
--- a/settings.py
+++ b/settings.py
@ -60,6 +60,7 @@ author = {
        'github': 'https://github.com/petermolnar',
        'instagram': 'https://www.instagram.com/petermolnarnet/',
        'twitter': 'https://twitter.com/petermolnar',
        'micro.blog': 'https://micro.blog/petermolnar',
    }
 }
--- a/templates/WebImage.j2.html
+++ b/templates/WebImage.j2.html
@ -53,6 +53,13 @@
                {{ exif.lens }}
            </dd>
        </dl>
 {% endif %}
 {% if onlinecopies|length > 1 %}
 <ul>
 {% for copy in onlinecopies %}
    <li><a href="{{ copy }}">[{{ loop.index }}]</a></li>
 {% endfor %}
 </ul>
 {% endif %}
    </figcaption>
 </figure>
--- a/templates/base.j2.html
+++ b/templates/base.j2.html
@ -218,6 +218,15 @@
                    </a>
                </dd>
                <dt>Classification</dt>
                <dd>
                    <ul>
                    {% for c in post.classification %}
                        <li>{{ c }}</li>
                    {% endfor %}
                    </ul>
                </dd>
                <dt>License</dt>
                <dd class="license">
                {% if post.licence == 'CC-BY-4.0' %}
--- a/templates/style.css
+++ b/templates/style.css
@ -133,12 +133,22 @@ input {
  border-bottom: 3px solid #ccc;
 }
 figcaption > ul,
 nav ul {
  list-style-type: none;
  margin: 0;
  padding: 0;
 }
 figcaption > ul {
  display:none;
  text-align: right;
 }
 figcaption ul li {
  display: inline-block;
 }
 nav li {
  display: inline-block;
  padding: 0 0.6em 0 0;