From bea3794f0aff2139ec3566914a5496bb6ee38cb3 Mon Sep 17 00:00:00 2001 From: Pascal Jufer Date: Fri, 6 Aug 2021 01:19:18 +0200 Subject: [PATCH] Switch to exifr for image metadata extraction (#6922) * Switch to exifr for image metadata extraction * Fix migrations on pg * Prevent double divider Co-authored-by: rijkvanzanten --- api/package.json | 3 +- ...210805B-change-image-metadata-structure.ts | 94 +++++++++++++++++++ api/src/services/files.ts | 49 ++++------ api/src/types/shims.d.ts | 10 -- api/src/utils/parse-iptc.ts | 51 ---------- app/src/lang/translations/en-US.yaml | 2 +- .../components/file-info-sidebar-detail.vue | 31 +++--- changelog.md | 87 +++++++++++------ package-lock.json | 35 ++----- 9 files changed, 204 insertions(+), 158 deletions(-) create mode 100644 api/src/database/migrations/20210805B-change-image-metadata-structure.ts delete mode 100644 api/src/utils/parse-iptc.ts diff --git a/api/package.json b/api/package.json index 434ed2d01f..cb365eb377 100644 --- a/api/package.json +++ b/api/package.json @@ -98,14 +98,13 @@ "dotenv": "^10.0.0", "eventemitter2": "^6.4.3", "execa": "^5.1.1", - "exif-reader": "^1.0.3", + "exifr": "^7.1.2", "express": "^4.17.1", "express-session": "^1.17.2", "fs-extra": "^10.0.0", "grant": "^5.4.14", "graphql": "^15.5.0", "graphql-compose": "^9.0.1", - "icc": "^2.0.0", "inquirer": "^8.1.1", "joi": "^17.3.0", "js-yaml": "^4.1.0", diff --git a/api/src/database/migrations/20210805B-change-image-metadata-structure.ts b/api/src/database/migrations/20210805B-change-image-metadata-structure.ts new file mode 100644 index 0000000000..952b4db42b --- /dev/null +++ b/api/src/database/migrations/20210805B-change-image-metadata-structure.ts @@ -0,0 +1,94 @@ +import { Knex } from 'knex'; + +// Change image metadata structure to match the output from 'exifr' +export async function up(knex: Knex): Promise { + const files = await knex + .select<{ id: number; metadata: string }[]>('id', 'metadata') + .from('directus_files') + .whereNotNull('metadata'); + + for (const { id, metadata } of files) { + let prevMetadata; + + try { + prevMetadata = JSON.parse(metadata); + } catch { + continue; + } + + // Update only required if metadata has 'exif' data + if (prevMetadata.exif) { + // Get all data from 'exif' and rename the following keys: + // - 'image' to 'ifd0' + // - 'thumbnail to 'ifd1' + // - 'interoperability' to 'interop' + const newMetadata = prevMetadata.exif; + + if (newMetadata.image) { + newMetadata.ifd0 = newMetadata.image; + delete newMetadata.image; + } + if (newMetadata.thumbnail) { + newMetadata.ifd1 = newMetadata.thumbnail; + delete newMetadata.thumbnail; + } + if (newMetadata.interoperability) { + newMetadata.interop = newMetadata.interoperability; + delete newMetadata.interoperability; + } + if (prevMetadata.icc) { + newMetadata.icc = prevMetadata.icc; + } + if (prevMetadata.iptc) { + newMetadata.iptc = prevMetadata.iptc; + } + + await knex('directus_files') + .update({ metadata: JSON.stringify(newMetadata) }) + .where({ id }); + } + } +} + +export async function down(knex: Knex): Promise { + const files = await knex + .select<{ id: number; metadata: string }[]>('id', 'metadata') + .from('directus_files') + .whereNotNull('metadata') + .whereNot('metadata', '{}'); + + for (const { id, metadata } of files) { + const prevMetadata = JSON.parse(metadata); + + // Update only required if metadata has keys other than 'icc' and 'iptc' + if (Object.keys(prevMetadata).filter((key) => key !== 'icc' && key !== 'iptc').length > 0) { + // Put all data under 'exif' and rename/move keys afterwards + const newMetadata: { exif: Record; icc?: unknown; iptc?: unknown } = { exif: prevMetadata }; + + if (newMetadata.exif.ifd0) { + newMetadata.exif.image = newMetadata.exif.ifd0; + delete newMetadata.exif.ifd0; + } + if (newMetadata.exif.ifd1) { + newMetadata.exif.thumbnail = newMetadata.exif.ifd1; + delete newMetadata.exif.ifd1; + } + if (newMetadata.exif.interop) { + newMetadata.exif.interoperability = newMetadata.exif.interop; + delete newMetadata.exif.interop; + } + if (newMetadata.exif.icc) { + newMetadata.icc = newMetadata.exif.icc; + delete newMetadata.exif.icc; + } + if (newMetadata.exif.iptc) { + newMetadata.iptc = newMetadata.exif.iptc; + delete newMetadata.exif.iptc; + } + + await knex('directus_files') + .update({ metadata: JSON.stringify(newMetadata) }) + .where({ id }); + } + } +} diff --git a/api/src/services/files.ts b/api/src/services/files.ts index ecc0c086e4..80b03f0b0d 100644 --- a/api/src/services/files.ts +++ b/api/src/services/files.ts @@ -1,7 +1,6 @@ import formatTitle from '@directus/format-title'; import axios, { AxiosResponse } from 'axios'; -import parseEXIF from 'exif-reader'; -import { parse as parseICC } from 'icc'; +import exifr from 'exifr'; import { clone } from 'lodash'; import { extension } from 'mime-types'; import path from 'path'; @@ -13,7 +12,6 @@ import { ForbiddenException, ServiceUnavailableException } from '../exceptions'; import logger from '../logger'; import storage from '../storage'; import { AbstractServiceOptions, File, PrimaryKey } from '../types'; -import parseIPTC from '../utils/parse-iptc'; import { toArray } from '@directus/shared/utils'; import { ItemsService, MutationOptions } from './items'; @@ -86,37 +84,30 @@ export class FilesService extends ItemsService { payload.height = meta.height; } - payload.filesize = meta.size; payload.metadata = {}; - if (meta.icc) { - try { - payload.metadata.icc = parseICC(meta.icc); - } catch (err) { - logger.warn(`Couldn't extract ICC information from file`); - logger.warn(err); + try { + payload.metadata = await exifr.parse(buffer.content, { + icc: true, + iptc: true, + ifd1: true, + interop: true, + translateValues: true, + reviveValues: true, + mergeOutput: false, + }); + if (payload.metadata?.iptc?.Headline) { + payload.title = payload.metadata.iptc.Headline; } - } - - if (meta.exif) { - try { - payload.metadata.exif = parseEXIF(meta.exif); - } catch (err) { - logger.warn(`Couldn't extract EXIF information from file`); - logger.warn(err); + if (!payload.description && payload.metadata?.iptc?.Caption) { + payload.description = payload.metadata.iptc.Caption; } - } - - if (meta.iptc) { - try { - payload.metadata.iptc = parseIPTC(meta.iptc); - payload.title = payload.metadata.iptc.headline || payload.title; - payload.description = payload.description || payload.metadata.iptc.caption; - payload.tags = payload.metadata.iptc.keywords; - } catch (err) { - logger.warn(`Couldn't extract IPTC information from file`); - logger.warn(err); + if (payload.metadata?.iptc?.Keywords) { + payload.tags = payload.metadata.iptc.Keywords; } + } catch (err) { + logger.warn(`Couldn't extract metadata from file`); + logger.warn(err); } } diff --git a/api/src/types/shims.d.ts b/api/src/types/shims.d.ts index 98fda04c9c..4ed0eeb3e9 100644 --- a/api/src/types/shims.d.ts +++ b/api/src/types/shims.d.ts @@ -3,16 +3,6 @@ declare module 'grant' { export default grant; } -declare module 'icc' { - const parse: (buf: Buffer) => Record; - export { parse }; -} - -declare module 'exif-reader' { - const exifReader: (buf: Buffer) => Record; - export default exifReader; -} - declare module 'pino-http' { import PinoHttp from '@types/pino-http'; const pinoHttp: PinoHttp; diff --git a/api/src/utils/parse-iptc.ts b/api/src/utils/parse-iptc.ts deleted file mode 100644 index 054ef007e3..0000000000 --- a/api/src/utils/parse-iptc.ts +++ /dev/null @@ -1,51 +0,0 @@ -const IPTC_ENTRY_TYPES = new Map([ - [0x78, 'caption'], - [0x6e, 'credit'], - [0x19, 'keywords'], - [0x37, 'dateCreated'], - [0x50, 'byline'], - [0x55, 'bylineTitle'], - [0x7a, 'captionWriter'], - [0x69, 'headline'], - [0x74, 'copyright'], - [0x0f, 'category'], -]); - -const IPTC_ENTRY_MARKER = Buffer.from([0x1c, 0x02]); - -export default function parseIPTC(buffer: Buffer): Record { - if (!Buffer.isBuffer(buffer)) return {}; - - const iptc: Record = {}; - let lastIptcEntryPos = buffer.indexOf(IPTC_ENTRY_MARKER); - - while (lastIptcEntryPos !== -1) { - lastIptcEntryPos = buffer.indexOf(IPTC_ENTRY_MARKER, lastIptcEntryPos + IPTC_ENTRY_MARKER.byteLength); - - const iptcBlockTypePos = lastIptcEntryPos + IPTC_ENTRY_MARKER.byteLength; - const iptcBlockSizePos = iptcBlockTypePos + 1; - const iptcBlockDataPos = iptcBlockSizePos + 2; - - const iptcBlockType = buffer.readUInt8(iptcBlockTypePos); - const iptcBlockSize = buffer.readUInt16BE(iptcBlockSizePos); - - if (!IPTC_ENTRY_TYPES.has(iptcBlockType)) { - continue; - } - - const iptcBlockTypeId = IPTC_ENTRY_TYPES.get(iptcBlockType); - const iptcData = buffer.slice(iptcBlockDataPos, iptcBlockDataPos + iptcBlockSize).toString(); - - if (iptcBlockTypeId) { - if (iptc[iptcBlockTypeId] == null) { - iptc[iptcBlockTypeId] = iptcData; - } else if (Array.isArray(iptc[iptcBlockTypeId])) { - iptc[iptcBlockTypeId].push(iptcData); - } else { - iptc[iptcBlockTypeId] = [iptc[iptcBlockTypeId], iptcData]; - } - } - } - - return iptc; -} diff --git a/app/src/lang/translations/en-US.yaml b/app/src/lang/translations/en-US.yaml index f9c88c5db6..1ed848dff6 100644 --- a/app/src/lang/translations/en-US.yaml +++ b/app/src/lang/translations/en-US.yaml @@ -408,7 +408,7 @@ documentation: Documentation sidebar: Sidebar duration: Duration charset: Charset -second: Second +second: second file_moved: File Moved collection_created: Collection Created modified_on: Modified On diff --git a/app/src/modules/files/components/file-info-sidebar-detail.vue b/app/src/modules/files/components/file-info-sidebar-detail.vue index 9816cdbf21..b2cd7187ab 100644 --- a/app/src/modules/files/components/file-info-sidebar-detail.vue +++ b/app/src/modules/files/components/file-info-sidebar-detail.vue @@ -80,32 +80,41 @@ -