Switch to exifr for image metadata extraction (#6922)

* Switch to exifr for image metadata extraction

* Fix migrations on pg

* Prevent double divider

Co-authored-by: rijkvanzanten <rijkvanzanten@me.com>
This commit is contained in:
Pascal Jufer
2021-08-06 01:19:18 +02:00
committed by GitHub
parent f9bf7853c9
commit bea3794f0a
9 changed files with 204 additions and 158 deletions

View File

@@ -0,0 +1,94 @@
import { Knex } from 'knex';
// Change image metadata structure to match the output from 'exifr'
export async function up(knex: Knex): Promise<void> {
const files = await knex
.select<{ id: number; metadata: string }[]>('id', 'metadata')
.from('directus_files')
.whereNotNull('metadata');
for (const { id, metadata } of files) {
let prevMetadata;
try {
prevMetadata = JSON.parse(metadata);
} catch {
continue;
}
// Update only required if metadata has 'exif' data
if (prevMetadata.exif) {
// Get all data from 'exif' and rename the following keys:
// - 'image' to 'ifd0'
// - 'thumbnail to 'ifd1'
// - 'interoperability' to 'interop'
const newMetadata = prevMetadata.exif;
if (newMetadata.image) {
newMetadata.ifd0 = newMetadata.image;
delete newMetadata.image;
}
if (newMetadata.thumbnail) {
newMetadata.ifd1 = newMetadata.thumbnail;
delete newMetadata.thumbnail;
}
if (newMetadata.interoperability) {
newMetadata.interop = newMetadata.interoperability;
delete newMetadata.interoperability;
}
if (prevMetadata.icc) {
newMetadata.icc = prevMetadata.icc;
}
if (prevMetadata.iptc) {
newMetadata.iptc = prevMetadata.iptc;
}
await knex('directus_files')
.update({ metadata: JSON.stringify(newMetadata) })
.where({ id });
}
}
}
export async function down(knex: Knex): Promise<void> {
const files = await knex
.select<{ id: number; metadata: string }[]>('id', 'metadata')
.from('directus_files')
.whereNotNull('metadata')
.whereNot('metadata', '{}');
for (const { id, metadata } of files) {
const prevMetadata = JSON.parse(metadata);
// Update only required if metadata has keys other than 'icc' and 'iptc'
if (Object.keys(prevMetadata).filter((key) => key !== 'icc' && key !== 'iptc').length > 0) {
// Put all data under 'exif' and rename/move keys afterwards
const newMetadata: { exif: Record<string, unknown>; icc?: unknown; iptc?: unknown } = { exif: prevMetadata };
if (newMetadata.exif.ifd0) {
newMetadata.exif.image = newMetadata.exif.ifd0;
delete newMetadata.exif.ifd0;
}
if (newMetadata.exif.ifd1) {
newMetadata.exif.thumbnail = newMetadata.exif.ifd1;
delete newMetadata.exif.ifd1;
}
if (newMetadata.exif.interop) {
newMetadata.exif.interoperability = newMetadata.exif.interop;
delete newMetadata.exif.interop;
}
if (newMetadata.exif.icc) {
newMetadata.icc = newMetadata.exif.icc;
delete newMetadata.exif.icc;
}
if (newMetadata.exif.iptc) {
newMetadata.iptc = newMetadata.exif.iptc;
delete newMetadata.exif.iptc;
}
await knex('directus_files')
.update({ metadata: JSON.stringify(newMetadata) })
.where({ id });
}
}
}

View File

@@ -1,7 +1,6 @@
import formatTitle from '@directus/format-title';
import axios, { AxiosResponse } from 'axios';
import parseEXIF from 'exif-reader';
import { parse as parseICC } from 'icc';
import exifr from 'exifr';
import { clone } from 'lodash';
import { extension } from 'mime-types';
import path from 'path';
@@ -13,7 +12,6 @@ import { ForbiddenException, ServiceUnavailableException } from '../exceptions';
import logger from '../logger';
import storage from '../storage';
import { AbstractServiceOptions, File, PrimaryKey } from '../types';
import parseIPTC from '../utils/parse-iptc';
import { toArray } from '@directus/shared/utils';
import { ItemsService, MutationOptions } from './items';
@@ -86,37 +84,30 @@ export class FilesService extends ItemsService {
payload.height = meta.height;
}
payload.filesize = meta.size;
payload.metadata = {};
if (meta.icc) {
try {
payload.metadata.icc = parseICC(meta.icc);
} catch (err) {
logger.warn(`Couldn't extract ICC information from file`);
logger.warn(err);
try {
payload.metadata = await exifr.parse(buffer.content, {
icc: true,
iptc: true,
ifd1: true,
interop: true,
translateValues: true,
reviveValues: true,
mergeOutput: false,
});
if (payload.metadata?.iptc?.Headline) {
payload.title = payload.metadata.iptc.Headline;
}
}
if (meta.exif) {
try {
payload.metadata.exif = parseEXIF(meta.exif);
} catch (err) {
logger.warn(`Couldn't extract EXIF information from file`);
logger.warn(err);
if (!payload.description && payload.metadata?.iptc?.Caption) {
payload.description = payload.metadata.iptc.Caption;
}
}
if (meta.iptc) {
try {
payload.metadata.iptc = parseIPTC(meta.iptc);
payload.title = payload.metadata.iptc.headline || payload.title;
payload.description = payload.description || payload.metadata.iptc.caption;
payload.tags = payload.metadata.iptc.keywords;
} catch (err) {
logger.warn(`Couldn't extract IPTC information from file`);
logger.warn(err);
if (payload.metadata?.iptc?.Keywords) {
payload.tags = payload.metadata.iptc.Keywords;
}
} catch (err) {
logger.warn(`Couldn't extract metadata from file`);
logger.warn(err);
}
}

View File

@@ -3,16 +3,6 @@ declare module 'grant' {
export default grant;
}
declare module 'icc' {
const parse: (buf: Buffer) => Record<string, string>;
export { parse };
}
declare module 'exif-reader' {
const exifReader: (buf: Buffer) => Record<string, any>;
export default exifReader;
}
declare module 'pino-http' {
import PinoHttp from '@types/pino-http';
const pinoHttp: PinoHttp;

View File

@@ -1,51 +0,0 @@
const IPTC_ENTRY_TYPES = new Map([
[0x78, 'caption'],
[0x6e, 'credit'],
[0x19, 'keywords'],
[0x37, 'dateCreated'],
[0x50, 'byline'],
[0x55, 'bylineTitle'],
[0x7a, 'captionWriter'],
[0x69, 'headline'],
[0x74, 'copyright'],
[0x0f, 'category'],
]);
const IPTC_ENTRY_MARKER = Buffer.from([0x1c, 0x02]);
export default function parseIPTC(buffer: Buffer): Record<string, any> {
if (!Buffer.isBuffer(buffer)) return {};
const iptc: Record<string, any> = {};
let lastIptcEntryPos = buffer.indexOf(IPTC_ENTRY_MARKER);
while (lastIptcEntryPos !== -1) {
lastIptcEntryPos = buffer.indexOf(IPTC_ENTRY_MARKER, lastIptcEntryPos + IPTC_ENTRY_MARKER.byteLength);
const iptcBlockTypePos = lastIptcEntryPos + IPTC_ENTRY_MARKER.byteLength;
const iptcBlockSizePos = iptcBlockTypePos + 1;
const iptcBlockDataPos = iptcBlockSizePos + 2;
const iptcBlockType = buffer.readUInt8(iptcBlockTypePos);
const iptcBlockSize = buffer.readUInt16BE(iptcBlockSizePos);
if (!IPTC_ENTRY_TYPES.has(iptcBlockType)) {
continue;
}
const iptcBlockTypeId = IPTC_ENTRY_TYPES.get(iptcBlockType);
const iptcData = buffer.slice(iptcBlockDataPos, iptcBlockDataPos + iptcBlockSize).toString();
if (iptcBlockTypeId) {
if (iptc[iptcBlockTypeId] == null) {
iptc[iptcBlockTypeId] = iptcData;
} else if (Array.isArray(iptc[iptcBlockTypeId])) {
iptc[iptcBlockTypeId].push(iptcData);
} else {
iptc[iptcBlockTypeId] = [iptc[iptcBlockTypeId], iptcData];
}
}
}
return iptc;
}