- now checking images against google vision api (why google, despite my despise of google, the company): vision api is the only one which is simple enough to use and their labelling is reasonable.

- checking text against google natural language api: the strict classification it offers is better, than free folksonomy, if I ever want to connect entries based on topic unfortunately they don't support Hungarian yet.
2018-12-11 14:06:18 +00:00 · 2018-12-11 14:06:18 +00:00 · 26c6ef77ed
commit 26c6ef77ed
parent 033a00db8e
7 changed files with 150 additions and 36 deletions
--- a/.gitignore
+++ b/.gitignore
@ -5,4 +5,3 @@ keys.py
 lib
 gcloud.json
 tests/.Exif.tests.jpg.json
-Pipfile.lock
--- a/exiftool.py
+++ b/exiftool.py
@ -10,6 +10,7 @@ import json
 import os
 import keys
 import requests
+import logging

 from pprint import pprint

@ -58,6 +59,38 @@ class CachedMeta(dict):
            for k, v in data.items():
                self[k] = v

+class GoogleClassifyText(CachedMeta):
+    def __init__(self, fpath, txt, lang='en'):
+        self.fpath = fpath
+        self.txt = txt
+        self.lang = lang
+        self._read()
+
+    def _call_tool(self):
+        params = {
+            "document": {
+                "type": "PLAIN_TEXT",
+                "content": self.txt,
+                "language": self.lang,
+            }
+        }
+
+        url = "https://language.googleapis.com/v1beta2/documents:classifyText?key=%s" % (
+            keys.gcloud.get('key')
+        )
+        logging.info('calling Google classidyText')
+        r = requests.post(url, json=params)
+        try:
+            resp = r.json()
+            for cat in resp.get('categories', []):
+                self[cat.get('name')] = cat.get('confidence')
+        except Exception as e:
+            logging.error(
+                'failed to call Google Vision API on: %s, reason: %s',
+                self.fpath,
+                e
+            )
+
 class GoogleVision(CachedMeta):
    def __init__(self, fpath, imgurl):
        self.fpath = fpath
@ -65,49 +98,84 @@ class GoogleVision(CachedMeta):
        self._read()

    @property
-    def cntr(self):
-        curr = 0
-        if os.path.exists('/tmp/visionapicallcounter'):
-            with open('/tmp/visionapicallcounter', 'rt') as f:
-                curr = int(f.read())
-        curr = curr + 1
-        with open('/tmp/visionapicallcounter', 'wt') as f:
-            f.write("%d" % curr)
-        return curr
+    def response(self):
+        if 'responses' not in self:
+            return {}
+        if not len(self['responses']):
+            return {}
+        if 'labelAnnotations' not in self['responses'][0]:
+            return {}
+        return self['responses'][0]
+
+    @property
+    def tags(self):
+        tags = []
+
+        if 'labelAnnotations' in self.response:
+            for label in self.response['labelAnnotations']:
+                tags.append(label['description'])
+
+        if 'webDetection' in self.response:
+            if 'webEntities' in self.response['webDetection']:
+                for label in self.response['webDetection']['webEntities']:
+                    tags.append(label['description'])
+        return tags
+
+    @property
+    def landmark(self):
+        landmark = None
+        if 'landmarkAnnotations' in self.response:
+            if len(self.response['landmarkAnnotations']):
+                match = self.response['landmarkAnnotations'].pop()
+                landmark = {
+                    'name': match['description'],
+                    'latitude': match['locations'][0]['latLng']['latitude'],
+                    'longitude': match['locations'][0]['latLng']['longitude']
+                }
+        return landmark
+
+    @property
+    def onlinecopies(self):
+        copies = []
+        if 'webDetection' in self.response:
+            if 'pagesWithMatchingImages' in self.response['webDetection']:
+                for match in self.response['webDetection']['pagesWithMatchingImages']:
+                    copies.append(match['url'])
+        return copies

    def _call_tool(self):
-        if (self.cntr >= 500 ):
-            raise ValueError('already at 500 requests!')
-
        params = {
-          "requests": [
-            {
-              "image": {
-                "source": {
-                  "imageUri": self.imgurl,
-                }
-              },
-              "features": [
-                {
-                  "type": "LANDMARK_DETECTION",
-                },
-                {
-                  "type": "LABEL_DETECTION",
-                },
-              ]
-            }
-          ]
+            "requests": [{
+                "image": {"source": {"imageUri": self.imgurl}},
+                "features": [
+                    {
+                      "type": "LANDMARK_DETECTION",
+                    },
+                    {
+                      "type": "WEB_DETECTION",
+                    },
+                    {
+                      "type": "LABEL_DETECTION",
+                    }
+                ]
+            }]
        }

-        url = "https://vision.googleapis.com/v1/images:annotate?key=%s" % (keys.gcloud.get('key'))
+        url = "https://vision.googleapis.com/v1/images:annotate?key=%s" % (
+            keys.gcloud.get('key')
+        )
+        logging.info('calling Google Vision API for %s', self.fpath)
        r = requests.post(url, json=params)
        try:
            resp = r.json()
-            resp = resp['responses'][0]
            for k, v in resp.items():
                self[k] = v
        except Exception as e:
-            logging.error('failed to call Google Vision API on: %s, reason: %s', self.fpath, e)
+            logging.error(
+                'failed to call Google Vision API on: %s, reason: %s',
+                self.fpath,
+                e
+            )

 class Exif(CachedMeta):
    def __init__(self, fpath):
--- a/nasg.py
+++ b/nasg.py
@ -30,7 +30,7 @@ from emoji import UNICODE_EMOJI
 from slugify import slugify
 import requests
 from pandoc import Pandoc
-from exiftool import Exif, GoogleVision
+from meta import Exif, GoogleVision, GoogleClassifyText
 import settings
 import keys

@ -506,6 +506,16 @@ class Singular(MarkdownDoc):
            pass
        return lang

+    @property
+    def classification(self):
+        c = GoogleClassifyText(self.fpath, self.content, self.lang)
+        k = '/Arts & Entertainment/Visual Art & Design/Photographic & Digital Arts'
+        if self.is_photo and k not in c.keys():
+            c.update({
+                k : '1.0'
+            })
+        return c
+
    @property
    def url(self):
        return "%s/%s/" % (
@ -578,6 +588,7 @@ class Singular(MarkdownDoc):
            'review': self.review,
            'has_code': self.has_code,
            'event': self.event,
+            'classification': self.classification.keys()
        }
        if (self.is_photo):
            v.update({
@ -707,7 +718,8 @@ class WebImage(object):
            'caption': self.caption,
            'exif': self.exif,
            'is_photo': self.is_photo,
-            'is_mainimg': self.is_mainimg
+            'is_mainimg': self.is_mainimg,
+            'onlinecopies': self.onlinecopies
        }

    def __str__(self):
@ -717,9 +729,17 @@ class WebImage(object):
        return tmpl.render(self.tmplvars)

    @cached_property
-    def vision(self):
+    def visionapi(self):
        return GoogleVision(self.fpath, self.src)

+    @property
+    def onlinecopies(self):
+        copies = {}
+        for m in self.visionapi.onlinecopies:
+            if settings.site.get('domain') not in m:
+                copies[m] = True
+        return copies.keys()
+
    @cached_property
    def meta(self):
        return Exif(self.fpath)
--- a/settings.py
+++ b/settings.py
@ -60,6 +60,7 @@ author = {
        'github': 'https://github.com/petermolnar',
        'instagram': 'https://www.instagram.com/petermolnarnet/',
        'twitter': 'https://twitter.com/petermolnar',
+        'micro.blog': 'https://micro.blog/petermolnar',
    }
 }

--- a/templates/WebImage.j2.html
+++ b/templates/WebImage.j2.html
@ -53,6 +53,13 @@
                {{ exif.lens }}
            </dd>
        </dl>
+{% endif %}
+{% if onlinecopies|length > 1 %}
+<ul>
+{% for copy in onlinecopies %}
+    <li><a href="{{ copy }}">[{{ loop.index }}]</a></li>
+{% endfor %}
+</ul>
 {% endif %}
    </figcaption>
 </figure>
--- a/templates/base.j2.html
+++ b/templates/base.j2.html
@ -218,6 +218,15 @@
                    </a>
                </dd>

+                <dt>Classification</dt>
+                <dd>
+                    <ul>
+                    {% for c in post.classification %}
+                        <li>{{ c }}</li>
+                    {% endfor %}
+                    </ul>
+                </dd>
+
                <dt>License</dt>
                <dd class="license">
                {% if post.licence == 'CC-BY-4.0' %}
--- a/templates/style.css
+++ b/templates/style.css
@ -133,12 +133,22 @@ input {
  border-bottom: 3px solid #ccc;
 }

+figcaption > ul,
 nav ul {
  list-style-type: none;
  margin: 0;
  padding: 0;
 }

+figcaption > ul {
+  display:none;
+  text-align: right;
+}
+
+figcaption ul li {
+  display: inline-block;
+}
+
 nav li {
  display: inline-block;
  padding: 0 0.6em 0 0;