diff --git a/common-lib/src/main/scala/com/gu/mediaservice/lib/metadata/ImageMetadataConverter.scala b/common-lib/src/main/scala/com/gu/mediaservice/lib/metadata/ImageMetadataConverter.scala index b967d407ab..0e73246752 100644 --- a/common-lib/src/main/scala/com/gu/mediaservice/lib/metadata/ImageMetadataConverter.scala +++ b/common-lib/src/main/scala/com/gu/mediaservice/lib/metadata/ImageMetadataConverter.scala @@ -12,6 +12,8 @@ object ImageMetadataConverter extends GridLogging { // keep positive years (ie AD) with a couple of days spare room to avoid accidentally flipping over into BC val earliestSensibleDate: DateTime = DateTime.parse("0001-01-03T00:00:00.000Z") + // offset hours representing possible range of + timezones ahead of UTC + private val utcHoursOffset: Int = 14 private def extractSubjects(fileMetadata: FileMetadata): Option[List[String]] = { val supplementalCategories = fileMetadata.iptc @@ -54,10 +56,19 @@ object ImageMetadataConverter extends GridLogging { def fromFileMetadata(fileMetadata: FileMetadata, latestAllowedDateTime: Option[DateTime] = None, earliestAllowedDateTime: Option[DateTime] = Some(earliestSensibleDate)): ImageMetadata = { def parseDate(dateString: String): Option[DateTime] = parseRandomDate(dateString, maxDate = latestAllowedDateTime, minDate = earliestAllowedDateTime) + + // setting of taken date + val exifTaken: Option[String] = fileMetadata.exifSub.get("Date/Time Original Composite") + val iptcTaken: Option[String] = fileMetadata.iptc.get("Date Time Created Composite") + val xmpTaken: Option[String] = fileMetadata.readXmpHeadStringProp("photoshop:DateCreated") + val iptcDate: Option[String] = fileMetadata.iptc.get("Date Created") + val dateTaken: Option[DateTime] = exifTaken.flatMap(parseDate) orElse + iptcTaken.flatMap(parseDate) orElse + xmpTaken.flatMap(parseDate) orElse + iptcDate.flatMap(parseDate) + ImageMetadata( - dateTaken = (fileMetadata.exifSub.get("Date/Time Original Composite") flatMap parseDate) orElse - (fileMetadata.iptc.get("Date Time Created Composite") flatMap parseDate) orElse - (fileMetadata.readXmpHeadStringProp("photoshop:DateCreated") flatMap parseDate), + dateTaken = dateTaken, description = fileMetadata.readXmpHeadStringProp("dc:description") orElse fileMetadata.iptc.get("Caption/Abstract") orElse fileMetadata.exif.get("Image Description"), @@ -138,6 +149,8 @@ object ImageMetadataConverter extends GridLogging { DateTimeFormat.forPattern("yyyyddMM"), DateTimeFormat.forPattern("yyyy"), DateTimeFormat.forPattern("yyyy-MM"), + DateTimeFormat.forPattern("yyyy:MM:dd"), + DateTimeFormat.forPattern("yyyy-MM-dd"), // 2014-12-16 - Maybe it's just a date // no timezone provided so force UTC rather than use the machine's timezone @@ -145,6 +158,8 @@ object ImageMetadataConverter extends GridLogging { ) private[metadata] def parseRandomDate(str: String, maxDate: Option[DateTime] = None, minDate: Option[DateTime] = None): Option[DateTime] = { + // account for images sent with local timezone info being interpreted as UTC + val feasibleMaxDate: Option[DateTime] = maxDate.map(_.plusHours(utcHoursOffset)) dateTimeFormatters.foldLeft[Option[DateTime]](None){ case (successfulDate@Some(_), _) => successfulDate // NB We refuse parse results which result in future dates, if a max date is provided. @@ -152,7 +167,7 @@ object ImageMetadataConverter extends GridLogging { // that it should be parsed as (eg) US (12th Jan 2021), not EU (1st Dec 2021). // So we refuse the (apparently successful) EU parse result. case (None, formatter) => safeParsing(formatter.parseDateTime(str)) - .filter(d => maxDate.forall(d.isBefore) && minDate.forall(d.isAfter)) + .filter(d => feasibleMaxDate.forall(d.isBefore) && minDate.forall(d.isAfter)) }.map(_.withZone(DateTimeZone.UTC)) }