8000 feat: allow direct pdf archives by fmartingr · Pull Request #942 · go-shiori/shiori · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content

feat: allow direct pdf archives #942

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 24 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/swagger/docs.go
Original file line number Diff line number Diff line change
Expand Up @@ -548,6 +548,12 @@ const docTemplate = `{
"model.BookmarkDTO": {
"type": "object",
"properties": {
"archivePath": {
"type": "string"
},
"archiver": {
"type": "string"
},
"author": {
"type": "string"
},
Expand Down
6 changes: 6 additions & 0 deletions docs/swagger/swagger.json
Original file line number Diff line number Diff line change
Expand Up @@ -537,6 +537,12 @@
"model.BookmarkDTO": {
"type": "object",
"properties": {
"archivePath": {
"type": "string"
},
"archiver": {
"type": "string"
},
"author": {
"type": "string"
},
Expand Down
4 changes: 4 additions & 0 deletions docs/swagger/swagger.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,10 @@ definitions:
type: object
model.BookmarkDTO:
properties:
archivePath:
type: string
archiver:
type: string
author:
type: string
create_archive:
Expand Down
2 changes: 2 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ require (
github.com/go-sql-driver/mysql v1.8.1
github.com/gofrs/uuid/v5 v5.3.0
github.com/golang-jwt/jwt/v5 v5.2.1
github.com/huandu/go-sqlbuilder v1.30.1
github.com/jmoiron/sqlx v1.4.0
github.com/julienschmidt/httprouter v1.3.0
github.com/lib/pq v1.10.9
Expand Down Expand Up @@ -92,6 +93,7 @@ require (
github.com/gogs/chardet v0.0.0-20211120154057-b7413eaefb8f // indirect
github.com/google/uuid v1.6.0 // indirect
github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect
github.com/huandu/xstrings v1.4.0 // indirect
github.com/inconshreveable/mousetrap v1.1.0 // indirect
github.com/josharian/intern v1.0.0 // indirect
github.com/json-iterator/go v1.1.12 // indirect
Expand Down
6 changes: 6 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,12 @@ github.com/grpc-ecosystem/grpc-gateway/v2 v2.16.0 h1:YBftPWNWd4WwGqtY2yeZL2ef8rH
github.com/grpc-ecosystem/grpc-gateway/v2 v2.16.0/go.mod h1:YN5jB8ie0yfIUg6VvR9Kz84aCaG7AsGZnLjhHbUqwPg=
github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k=
github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM=
github.com/huandu/go-assert v1.1.6 h1:oaAfYxq9KNDi9qswn/6aE0EydfxSa+tWZC1KabNitYs=
github.com/huandu/go-assert v1.1.6/go.mod h1:JuIfbmYG9ykwvuxoJ3V8TB5QP+3+ajIA54Y44TmkMxs=
github.com/huandu/go-sqlbuilder v1.30.1 h1:rsneJuMBZcGpxK6YQcVtKclhFT0wbM2gmOqlTXaQc2w=
github.com/huandu/go-sqlbuilder v1.30.1/go.mod h1:mS0GAtrtW+XL6nM2/gXHRJax2RwSW1TraavWDFAc1JA=
github.com/huandu/xstrings v1.4.0 h1:D17IlohoQq4UcpqD7fDk80P7l+lwAmlFaBHgOipl2FU=
github.com/huandu/xstrings v1.4.0/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE=
github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
github.com/jmoiron/sqlx v1.4.0 h1:1PLqN7S1UYp5t4SrVVnt4nUVNemrDAtxlulVe+Qgm3o=
Expand Down
57 changes: 57 additions & 0 deletions internal/archiver/pdf.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
package archiver

import (
"fmt"
"strings"

"github.com/go-shiori/shiori/internal/dependencies"
"github.com/go-shiori/shiori/internal/model"
)

type PDFArchiver struct {
deps *dependencies.Dependencies
}

func (a *PDFArchiver) Matches(archiverReq *model.ArchiverRequest) bool {
return strings.Contains(archiverReq.ContentType, "application/pdf")
}

func (a *PDFArchiver) Archive(archiverReq *model.ArchiverRequest) (*model.BookmarkDTO, error) {
bookmark := &archiverReq.Bookmark

if err := a.deps.Domains.Storage.WriteData(model.GetArchivePath(bookmark), archiverReq.Content); err != nil {
return nil, fmt.Errorf("error saving pdf archive: %v", err)
}

bookmark.ArchivePath = model.GetArchivePath(bookmark)
bookmark.HasArchive = true
bookmark.Archiver = model.ArchiverPDF

return bookmark, nil
}

func (a *PDFArchiver) GetArchiveFile(bookmark model.BookmarkDTO, resourcePath string) (*model.ArchiveFile, error) {
archivePath := model.GetArchivePath(&bookmark)

if !a.deps.Domains.Storage.FileExists(archivePath) {
return nil, fmt.Errorf("archive for bookmark %d doesn't exist", bookmark.ID)
}

archiveFile, err := a.deps.Domains.Storage.FS().Open(archivePath)
if err != nil {
return nil, fmt.Errorf("error opening pdf archive: %w", err)
}

info, err := archiveFile.Stat()
if err != nil {
return nil, fmt.Errorf("error getting pdf archive info: %w", err)
}

return model.NewArchiveFile(archiveFile, "application/pdf", "", info.Size()), nil
}

func NewPDFArchiver(deps *dependencies.Dependencies) *PDFArchiver {
return &PDFArchiver{
deps: deps,
}
}
78 changes: 78 additions & 0 deletions internal/archiver/warc.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
package archiver

import (
"bytes"
"fmt"
"path/filepath"
"strings"

"github.com/go-shiori/shiori/internal/core"
"github.com/go-shiori/shiori/internal/dependencies"
"github.com/go-shiori/shiori/internal/model"
"github.com/go-shiori/warc"
)

// LEGACY WARNING
// This file contains legacy code that will be removed once we move on to Obelisk as
// general archiver.

type WARCArchiver struct {
deps *dependencies.Dependencies
}

func (a *WARCArchiver) Matches(archiverReq *model.ArchiverRequest) bool {
// TODO: set to true for now as catch-all but we will remove this archiver soon
return true
}

func (a *WARCArchiver) Archive(archiverReq *model.ArchiverRequest) (*model.BookmarkDTO, error) {
processRequest := core.ProcessRequest{
DataDir: a.deps.Config.Storage.DataDir,
Bookmark: archiverReq.Bookmark,
Content: bytes.NewReader(archiverReq.Content),
ContentType: archiverReq.ContentType,
}

result, isFatalErr, err := core.ProcessBookmark(a.deps, processRequest)

if err != nil && isFatalErr {
return nil, fmt.Errorf("failed to process: %v", err)
}

return &result, nil
}

func (a *WARCArchiver) GetArchiveFile(bookmark model.BookmarkDTO, resourcePath string) (*model.ArchiveFile, error) {
archivePath := model.GetArchivePath(&bookmark)

if !a.deps.Domains.Storage.FileExists(archivePath) {
return nil, fmt.Errorf("archive for bookmark %d doesn't exist", bookmark.ID)
}

warcFile, err := warc.Open(filepath.Join(a.deps.Config.Storage.DataDir, archivePath))
if err != nil {
return nil, fmt.Errorf("error opening warc file: %w", err)
}

defer warcFile.Close()

if !warcFile.HasResource(resourcePath) {
return nil, fmt.Errorf("resource %s doesn't exist in archive", resourcePath)
}

content, contentType, err := warcFile.Read(resourcePath)
if err != nil {
return nil, fmt.Errorf("error reading resource %s: %w", resourcePath, err)
}

// Note: Using this method to send the reader instead of `bytes.NewReader` because that
// crashes the moment we try to retrieve it for some reason. Since this is a legacy archiver
// I don't want to spend more time on this. (@fmartingr)
return model.NewArchiveFile(strings.NewReader(string(content)), contentType, "gzip", int64(len(content))), nil
}

func NewWARCArchiver(deps *dependencies.Dependencies) *WARCArchiver {
return &WARCArchiver{
deps: deps,
}
}
38 changes: 11 additions & 27 deletions internal/cmd/add.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ func addCmd() *cobra.Command {
}

func addHandler(cmd *cobra.Command, args []string) {
cfg, deps := initShiori(cmd.Context(), cmd)
_, deps := initShiori(cmd.Context(), cmd)

// Read flag and arguments
url := args[0]
Expand All @@ -38,7 +38,6 @@ func addHandler(cmd *cobra.Command, args []string) {
tags, _ := cmd.Flags().GetStringSlice("tags")
offline, _ := cmd.Flags().GetBool("offline")
noArchival, _ := cmd.Flags().GetBool("no-archival")
logArchival, _ := cmd.Flags().GetBool("log-archival")

// Normalize input
title = validateTitle(title, "")
Expand Down Expand Up @@ -84,37 +83,22 @@ func addHandler(cmd *cobra.Command, args []string) {
if !offline {
cInfo.Println("Downloading article...")

var isFatalErr bool
content, contentType, err := core.DownloadBookmark(book.URL)
result, err := deps.Domains.Archiver.GenerateBookmarkArchive(book)
if err != nil {
cError.Printf("Failed to download: %v\n", err)
cError.Printf("Failed to download article: %v\n", err)
os.Exit(1)
}

if title != "" {
result.Title = title
}

if err == nil && content != nil {
request := core.ProcessRequest{
DataDir: cfg.Storage.DataDir,
Bookmark: book,
Content: content,
ContentType: contentType,
LogArchival: logArchival,
KeepTitle: title != "",
KeepExcerpt: excerpt != "",
}

book, isFatalErr, err = core.ProcessBookmark(deps, request)
content.Close()

if err != nil {
cError.Printf("Failed: %v\n", err)
}

if isFatalErr {
os.Exit(1)
}
if excerpt != "" {
result.Excerpt = excerpt
}

// Save bookmark to database
_, err = deps.Database.SaveBookmarks(cmd.Context(), false, book)
_, err = deps.Database.SaveBookmarks(cmd.Context(), false, *result)
if err != nil {
cError.Printf("Failed to save bookmark with content: %v\n", err)
os.Exit(1)
Expand Down
20 changes: 6 additions & 14 deletions internal/core/ebook.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ import (
"os"
fp "path/filepath"
"strconv"
"strings"

epub "github.com/go-shiori/go-epub"
"github.com/go-shiori/shiori/internal/dependencies"
Expand All @@ -15,35 +14,28 @@ import (
// GenerateEbook receives a `ProcessRequest` and generates an ebook file in the destination path specified.
// The destination path `dstPath` should include file name with ".epub" extension
// The bookmark model will be used to update the UI based on whether this function is successful or not.
func GenerateEbook(deps *dependencies.Dependencies, req ProcessRequest, dstPath string) (book model.BookmarkDTO, err error) {
func GenerateEbook(deps *dependencies.Dependencies, req model.EbookProcessRequest) (book model.BookmarkDTO, err error) {
book = req.Bookmark
dstPath := model.GetEbookPath(&book)

// Make sure bookmark ID is defined
if book.ID == 0 {
return book, errors.New("bookmark ID is not valid")
}

if deps.Domains.Storage.FileExists(dstPath) && req.SkipExisting {
return book, nil
}

// Get current state of bookmark cheak archive and thumb
strID := strconv.Itoa(book.ID)

bookmarkThumbnailPath := model.GetThumbnailPath(&book)
bookmarkArchivePath := model.GetArchivePath(&book)

if deps.Domains.Storage.FileExists(bookmarkThumbnailPath) {
book.ImageURL = fp.Join("/", "bookmark", strID, "thumb")
}

if deps.Domains.Storage.FileExists(bookmarkArchivePath) {
book.HasArchive = true
}

// This function create ebook from reader mode of bookmark so
// we can't create ebook from PDF so we return error here if bookmark is a pdf
contentType := req.ContentType
if strings.Contains(contentType, "application/pdf") {
return book, errors.New("can't create ebook for pdf")
}

// Create temporary epub file
tmpFile, err := os.CreateTemp("", "ebook")
if err != nil {
Expand Down
Loading
Loading
0