1
0
mirror of https://github.com/cs3org/wopiserver.git synced 2025-04-18 13:04:00 +03:00

Merge remote-tracking branch 'wopibridge/master' into bridge

This commit is contained in:
Giuseppe Lo Presti 2021-06-18 22:30:39 +02:00
commit fba1bc86a8
11 changed files with 1539 additions and 0 deletions

66
.drone_bridge.yml Normal file
View File

@ -0,0 +1,66 @@
---
kind: pipeline
type: docker
name: release-latest
platform:
os: linux
arch: amd64
trigger:
branch:
- master
event:
exclude:
- pull_request
- tag
- promote
- rollback
steps:
- name: publish-docker-wopibridge-latest
pull: always
image: plugins/docker
settings:
repo: cs3org/wopibridge
tags: latest
dockerfile: wopibridge-poc.Dockerfile
username:
from_secret: dockerhub_username
password:
from_secret: dockerhub_password
custom_dns:
- 128.142.17.5
- 128.142.16.5
---
kind: pipeline
type: docker
name: release
platform:
os: linux
arch: amd64
trigger:
event:
include:
- tag
steps:
- name: publish-docker-wopibridge-tag
pull: always
image: plugins/docker
settings:
repo: cs3org/wopibridge
tags: ${DRONE_TAG}
dockerfile: wopibridge-poc.Dockerfile
username:
from_secret: dockerhub_username
password:
from_secret: dockerhub_password
build_args:
- VERSION=${DRONE_TAG}
custom_dns:
- 128.142.17.5
- 128.142.16.5

114
.vscode/.ropeproject/config.py vendored Normal file
View File

@ -0,0 +1,114 @@
# The default ``config.py``
# flake8: noqa
def set_prefs(prefs):
"""This function is called before opening the project"""
# Specify which files and folders to ignore in the project.
# Changes to ignored resources are not added to the history and
# VCSs. Also they are not returned in `Project.get_files()`.
# Note that ``?`` and ``*`` match all characters but slashes.
# '*.pyc': matches 'test.pyc' and 'pkg/test.pyc'
# 'mod*.pyc': matches 'test/mod1.pyc' but not 'mod/1.pyc'
# '.svn': matches 'pkg/.svn' and all of its children
# 'build/*.o': matches 'build/lib.o' but not 'build/sub/lib.o'
# 'build//*.o': matches 'build/lib.o' and 'build/sub/lib.o'
prefs['ignored_resources'] = ['*.pyc', '*~', '.ropeproject',
'.hg', '.svn', '_svn', '.git', '.tox']
# Specifies which files should be considered python files. It is
# useful when you have scripts inside your project. Only files
# ending with ``.py`` are considered to be python files by
# default.
# prefs['python_files'] = ['*.py']
# Custom source folders: By default rope searches the project
# for finding source folders (folders that should be searched
# for finding modules). You can add paths to that list. Note
# that rope guesses project source folders correctly most of the
# time; use this if you have any problems.
# The folders should be relative to project root and use '/' for
# separating folders regardless of the platform rope is running on.
# 'src/my_source_folder' for instance.
# prefs.add('source_folders', 'src')
# You can extend python path for looking up modules
# prefs.add('python_path', '~/python/')
# Should rope save object information or not.
prefs['save_objectdb'] = True
prefs['compress_objectdb'] = False
# If `True`, rope analyzes each module when it is being saved.
prefs['automatic_soa'] = True
# The depth of calls to follow in static object analysis
prefs['soa_followed_calls'] = 0
# If `False` when running modules or unit tests "dynamic object
# analysis" is turned off. This makes them much faster.
prefs['perform_doa'] = True
# Rope can check the validity of its object DB when running.
prefs['validate_objectdb'] = True
# How many undos to hold?
prefs['max_history_items'] = 32
# Shows whether to save history across sessions.
prefs['save_history'] = True
prefs['compress_history'] = False
# Set the number spaces used for indenting. According to
# :PEP:`8`, it is best to use 4 spaces. Since most of rope's
# unit-tests use 4 spaces it is more reliable, too.
prefs['indent_size'] = 4
# Builtin and c-extension modules that are allowed to be imported
# and inspected by rope.
prefs['extension_modules'] = []
# Add all standard c-extensions to extension_modules list.
prefs['import_dynload_stdmods'] = True
# If `True` modules with syntax errors are considered to be empty.
# The default value is `False`; When `False` syntax errors raise
# `rope.base.exceptions.ModuleSyntaxError` exception.
prefs['ignore_syntax_errors'] = False
# If `True`, rope ignores unresolvable imports. Otherwise, they
# appear in the importing namespace.
prefs['ignore_bad_imports'] = False
# If `True`, rope will insert new module imports as
# `from <package> import <module>` by default.
prefs['prefer_module_from_imports'] = False
# If `True`, rope will transform a comma list of imports into
# multiple separate import statements when organizing
# imports.
prefs['split_imports'] = False
# If `True`, rope will remove all top-level import statements and
# reinsert them at the top of the module when making changes.
prefs['pull_imports_to_top'] = True
# If `True`, rope will sort imports alphabetically by module name instead
# of alphabetically by import statement, with from imports after normal
# imports.
prefs['sort_imports_alphabetically'] = False
# Location of implementation of
# rope.base.oi.type_hinting.interfaces.ITypeHintingFactory In general
# case, you don't have to change this value, unless you're an rope expert.
# Change this value to inject you own implementations of interfaces
# listed in module rope.base.oi.type_hinting.providers.interfaces
# For example, you can add you own providers for Django Models, or disable
# the search type-hinting in a class hierarchy, etc.
prefs['type_hinting_factory'] = (
'rope.base.oi.type_hinting.factory.default_type_hinting_factory')
def project_opened(project):
"""This function is called after opening the project"""
# Do whatever you like here!

85
bridge_changelog.md Normal file
View File

@ -0,0 +1,85 @@
## Changelog for the WOPI bridge
### wopibridge 4.0.0 (2021-06-14)
* Refactored code to support plugins, and
introduced a plugin for Etherpad with
minimal functionality
* Fixed public links for CodiMD
* Fixed repeated autosaving in CodiMD when
a document is left open (#11)
### wopibridge 3.2.0 (2021-04-23)
* Introduced a shared secret as an `apikey`
parameter to be passed to CodiMD
* Fixed case of double redirection
* Improved display name
### wopibridge 3.1.0 (2021-03-19)
* Fixed a number of corner cases with the CodiMD
update API and the WOPI Put/PutRelative calls
### wopibridge 3.0.0 (2021-03-05)
* Added support for the Update CodiMD API and for
deterministic noteid hashing (#7), which requires
CodiMD to have `ALLOW_FREEURL` set to true
* Added a secret file in the docker configuration,
for the noteid hashing and for the /list endpoint
* Simplified bookkeeping logic of the save thread
* Improved logging
### wopibridge 2.2.0 (2021-02-11)
* Improved logic for cleaning up current sessions
and better handling of failure scenarios
* Improved logging
### wopibridge 2.1.0 (2021-02-05)
* Reworked logic to recover when lock is missing (#4)
* Improved UI feedback messages
* Introduced a new parameter `APP_UNLOCK_INTERVAL`
to control when a file is unlocked following
a close event from all concurrent sessions
* Code further refactored and simplified
### wopibridge 2.0.0 (2020-12-14)
* Full refactoring of the code, to separate
the core server from the CodiMD-specific code
* Fixed handling of multiple collaborating users
and multiple wopibridge instances by storing
more state in the WOPI locks
### wopibridge 1.2.0 (2020-12-01)
* Better support for slides
* Improved logging and UI responses
* Added versioning
### wopibridge 1.1.0 (2020-11-11)
* Several fixes around saving and feedback to UI
* First version fully integrated in CERNBox (Canary mode)
### wopibridge 1.0.0 (2020-10-13)
* Full refactoring to achieve a MVP
* Added support for autosave
* Removed dependency on locally mounted CodiMD storage
* Added support for a custom `APP_ROOT`
### wopibridge 0.3 (2020-08-05)
* Make use of extended features of CodiMD to support
read-only mode and display name
* Introduced K8s deployment
* Automatic build with drone
### wopibridge 0.2 (2020-05-18)
* Repo moved to the CS3 Organisation
* Added support for attachments (images)
### wopibridge 0.1 (2020-04-08)
* First PoC of a WOPI bridge service for CodiMD,
including collaborative editing and WOPI-compliant locking.

39
bridge_readme.md Normal file
View File

@ -0,0 +1,39 @@
# WOPI Bridge
This is a proof-of-concept WOPI bridge server, currently only targeting CodiMD, to allow bridging it to a WOPI-enabled EFSS service like ownCloud or CERNBox. It uses the private REST API of CodiMD, which will be hopefully made public and complete in the future. The approach is generic to allow for extending the concept to other Office-like applications exposing a minimal load/save REST API.
## What works
* REST service with two endpoints:
- `/open` meant to be called by the EFSS with a WOPISrc and a WOPI access token, returns a file displayed in CodiMD
- `/save` auto-called by the CodiMD backend when some changes are detected on the open document
* Stateless server, all context stored in the WOPI lock or passed through arguments
* Readonly (publish or slide) mode vs. read/write mode
* Collaborative editing and locking of the file
* Transparent handling of uploads (i.e. pictures):
* If a note has no pictures, it is handled as a `.md` text file
* Once a picture is included, on close the save to WOPI is executed as a zipped bundle, with a `.zmd` extension, and the previous `.md` file is removed; similarly if all pictures are removed and the file is saved back as `.md`
* Files ending as `.zmd` are equally treated as zipped bundles and expanded to CodiMD
### Required CodiMD APIs
* `/new` push a new file to a random `<noteid>`
* `/<noteid>` display a file, or reserve a `<noteid>` if not existing
* `/<noteid>/publish` display a file in readonly mode
* `/<noteid>/slide` display a file in slide mode
* `/<noteid>/download` get a raw file to store it back
* `/uploadimage` upload a new picture
* `/uploads/upload_<hash>` get an uploaded picture
* `/api/notes/<noteid>` update a file via PUT
### Required WOPI APIs
* `GetFileInfo`: get all file metadata
* `GetFile`: get the file content
* `GetLock`: check if the file is locked
* `Lock`: lock a file on open for write
* `PutFile`: store a file's content
* `PutRelative`: store a file under a different name
* `Unlock`: unlock a file on close
* `Delete`: delete a previous edition of a file
## Changelog
[Available here](CHANGELOG.md)

88
codinet.yaml Normal file
View File

@ -0,0 +1,88 @@
# docker-compose configuration file for CodiMD + Postgres + WOPI bridge
#
# Build with: docker-compose -f codinet.yaml build
# Run with: DBPWD='yourdbpassword' HOST='https://yourserver' docker-compose -f codimd.yaml up -d
#
version: "3.1"
services:
database:
image: postgres:11.6-alpine
container_name: codimd-postgres
hostname: cbox-codimd-postgres
environment:
- POSTGRES_USER=codimd
- POSTGRES_PASSWORD=${DBPWD}
- POSTGRES_DB=codimd
network_mode: codinet
volumes:
- "database-data:/var/lib/postgresql/data"
restart: always
frontend:
image: gitlab-registry.cern.ch/authoring/notes/codimd:cernbox-integration
container_name: codimd-web
hostname: cbox-codimd-web
environment:
- CMD_DB_URL=postgres://codimd:${DBPWD}@codimd-postgres/codimd
- CMD_AUTO_VERSION_CHECK=false
- CMD_ALLOW_ANONYMOUS=true
- CMD_ALLOW_ANONYMOUS_EDITS=true
- CMD_ALLOW_ANONYMOUS_VIEWS=true
- CMS_ALLOW_FREEURL=true
- CMD_EMAIL=false
- CMD_ALLOW_EMAIL_REGISTER=false
- CMD_USESSL=true
- NODE_TLS_REJECT_UNAUTHORIZED=0
- CMD_APPLIANCE_MODE=true
- CMD_SAVE_WEBHOOK=https://codimd-wopi.codinet:8000/wopib/save
depends_on:
- database
ports:
- 3000:3000
secrets:
- cert.pem
- key.pem
- dhparam.pem
network_mode: codinet
restart: always
wopibridge:
build:
context: .
dockerfile: wopibridge-poc.Dockerfile
image: wopibridge-poc:cern
container_name: codimd-wopi
hostname: cbox-codimd-wopi
environment:
- CODIMD_INT_URL=https://codimd-web.codinet:3000
- CODIMD_EXT_URL=${HOST}:3000
depends_on:
- frontend
ports:
- 8000:8000
volumes:
- logs:/var/log/wopi
secrets:
- cert.pem
- key.pem
network_mode: codinet
restart: always
healthcheck:
test: ["CMD", "curl", "--insecure", "https://localhost:8000"]
interval: 600s
timeout: 5s
retries: 3
volumes:
database-data: {}
logs: {}
networks:
codinet:
secrets:
cert.pem:
file: /etc/grid-security/hostcert.pem
key.pem:
file: /etc/grid-security/hostkey.pem
dhparam.pem:
file: ./dhparams.pem

263
poc_src/codimd.py Normal file
View File

@ -0,0 +1,263 @@
'''
codimd.py
The CodiMD-specific code used by the WOPI bridge.
Author: Giuseppe.LoPresti@cern.ch, CERN/IT-ST
'''
import os
import re
import zipfile
import io
from random import randint
import json
import hashlib
import urllib.parse as urlparse
import http.client
import requests
import wopiclient as wopi
class AppFailure(Exception):
'''A custom exception to represent a fatal failure when contacting CodiMD'''
# a regexp for uploads, that have links like '/uploads/upload_542a360ddefe1e21ad1b8c85207d9365.*'
upload_re = re.compile(r'\/uploads\/upload_\w{32}\.\w+')
# initialized by the main class or by the init method
appurl = None
appexturl = None
apikey = None
log = None
skipsslverify = None
def init(env, apipath):
'''Initialize global vars from the environment'''
global appurl
global appexturl
global apikey
appexturl = env.get('CODIMD_EXT_URL')
if not appexturl:
raise ValueError("Missing CODIMD_EXT_URL env var")
appurl = env.get('CODIMD_URL')
if not appurl:
# defaults to the external
appurl = appexturl
with open(apipath + 'codimd_apikey') as f:
apikey = f.readline().strip('\n')
def getredirecturl(isreadwrite, wopisrc, acctok, wopilock, displayname):
'''Return a valid URL to the app for the given WOPI context'''
if isreadwrite:
return appexturl + wopilock['docid'] + '?metadata=' + \
urlparse.quote_plus('%s?t=%s' % (wopisrc, acctok)) + \
'&apiKey=' + apikey + '&displayName=' + displayname
# read-only mode: in this case redirect to publish mode or normal view
# to quickly jump in slide mode depending on the content
url = wopilock['docid'] + ('/publish' if wopilock['app'] != 'mds' else '')
res = requests.head(appurl + url,
params={'apiKey': apikey},
verify=not skipsslverify)
if res.status_code == http.client.FOUND:
return appexturl + '/s/' + urlparse.urlsplit(res.next.url).path.split('/')[-1]
return appexturl + url + '?apiKey=' + apikey
# Cloud storage to CodiMD
##########################
def _unzipattachments(inputbuf):
'''Unzip the given input buffer uploading the content to CodiMD and return the contained .md file'''
inputzip = zipfile.ZipFile(io.BytesIO(inputbuf), compression=zipfile.ZIP_STORED)
mddoc = None
for zipinfo in inputzip.infolist():
fname = zipinfo.filename
log.debug('msg="Extracting attachment" name="%s"' % fname)
if os.path.splitext(fname)[1] == '.md':
mddoc = inputzip.read(zipinfo)
else:
# first check if the file already exists in CodiMD:
res = requests.head(appurl + '/uploads/' + fname, verify=not skipsslverify)
if res.status_code == http.client.OK and int(res.headers['Content-Length']) == zipinfo.file_size:
# yes (assume that hashed filename AND size matching is a good enough content match!)
log.debug('msg="Skipped existing attachment" filename="%s"' % fname)
continue
# check for collision
if res.status_code == http.client.OK:
log.warning('msg="Attachment collision detected" filename="%s"' % fname)
# append a random letter to the filename
name, ext = os.path.splitext(fname)
fname = name + '_' + chr(randint(65, 65+26)) + ext
# and replace its reference in the document (this creates a copy of the doc, not very efficient)
mddoc = mddoc.replace(zipinfo.filename, fname)
# OK, let's upload
log.debug('msg="Pushing attachment" filename="%s"' % fname)
res = requests.post(appurl + '/uploadimage', params={'generateFilename': 'false'},
files={'image': (fname, inputzip.read(zipinfo))}, verify=not skipsslverify)
if res.status_code != http.client.OK:
log.error('msg="Failed to push included file" filename="%s" httpcode="%d"' % (fname, res.status_code))
return mddoc
def _isslides(doc):
'''Heuristically look for signatures of slides in the header of a md document'''
return doc[:9].decode() == '---\ntitle' or doc[:8].decode() == '---\ntype' or doc[:16].decode() == '---\nslideOptions'
def _fetchfromcodimd(wopilock, acctok):
'''Fetch a given document from from CodiMD, raise AppFailure in case of errors'''
try:
res = requests.get(appurl + wopilock['docid'] + '/download', verify=not skipsslverify)
if res.status_code != http.client.OK:
log.error('msg="Unable to fetch document from CodiMD" token="%s" response="%d: %s"' %
(acctok[-20:], res.status_code, res.content.decode()))
raise AppFailure
return res.content
except requests.exceptions.ConnectionError as e:
log.error('msg="Exception raised attempting to connect to CodiMD" exception="%s"' % e)
raise AppFailure
def loadfromstorage(filemd, wopisrc, acctok, docid):
'''Copy document from storage to CodiMD'''
# WOPI GetFile
res = wopi.request(wopisrc, acctok, 'GET', contents=True)
if res.status_code != http.client.OK:
raise ValueError(res.status_code)
mdfile = res.content
wasbundle = os.path.splitext(filemd['BaseFileName'])[1] == '.zmd'
# if it's a bundled file, unzip it and push the attachments in the appropriate folder
if wasbundle:
mddoc = _unzipattachments(mdfile)
else:
mddoc = mdfile
# compute its SHA1 hash for later checks if the file was modified
h = hashlib.sha1()
h.update(mddoc)
try:
if not docid:
# read-only case: push the doc to a newly generated note with a random docid
res = requests.post(appurl + '/new', data=mddoc,
allow_redirects=False,
params={'mode': 'locked'},
headers={'Content-Type': 'text/markdown'},
verify=not skipsslverify)
if res.status_code != http.client.FOUND:
log.error('msg="Unable to push read-only document to CodiMD" token="%s" response="%d"' %
(acctok[-20:], res.status_code))
raise AppFailure
docid = urlparse.urlsplit(res.next.url).path.split('/')[-1]
log.info('msg="Pushed read-only document to CodiMD" docid="%s" token="%s"' % (docid, acctok[-20:]))
else:
# reserve the given docid in CodiMD via a HEAD request
res = requests.head(appurl + '/' + docid,
params={'apiKey': apikey},
verify=not skipsslverify)
if res.status_code not in (http.client.OK, http.client.FOUND):
log.error('msg="Unable to reserve note hash in CodiMD" token="%s" response="%d"' %
(acctok[-20:], res.status_code))
raise AppFailure
# check if the target docid is real or is a redirect
if res.status_code == http.client.FOUND:
newdocid = urlparse.urlsplit(res.next.url).path.split('/')[-1]
log.info('msg="Document got aliased in CodiMD" olddocid="%s" docid="%s" token="%s"' %
(docid, newdocid, acctok[-20:]))
docid = newdocid
else:
log.debug('msg="Got note hash from CodiMD" docid="%s"' % docid)
# push the document to CodiMD with the update API
res = requests.put(appurl + '/api/notes/' + docid,
params={'apiKey': apikey}, # possibly required in the future
json={'content': mddoc.decode()},
verify=not skipsslverify)
if res.status_code == http.client.FORBIDDEN:
# the file got unlocked because of no activity, yet some user is there: let it go
log.warning('msg="Document was being edited in CodiMD, redirecting user" token"%s"' % acctok[-20:])
elif res.status_code != http.client.OK:
log.error('msg="Unable to push document to CodiMD" token="%s" response="%d"' %
(acctok[-20:], res.status_code))
raise AppFailure
log.info('msg="Pushed document to CodiMD" docid="%s" token="%s"' % (docid, acctok[-20:]))
except requests.exceptions.ConnectionError as e:
log.error('msg="Exception raised attempting to connect to CodiMD" exception="%s"' % e)
raise AppFailure
# generate and return a WOPI lock structure for this document
return wopi.generatelock(docid, filemd, h.hexdigest(), 'mds' if _isslides(mddoc) else 'md', acctok, False)
# CodiMD to cloud storage
##########################
def _getattachments(mddoc, docfilename, forcezip=False):
'''Parse a markdown file and generate a zip file containing all included files'''
zip_buffer = io.BytesIO()
response = None
for attachment in upload_re.findall(mddoc):
log.debug('msg="Fetching attachment" url="%s"' % attachment)
res = requests.get(appurl + attachment, verify=not skipsslverify)
if res.status_code != http.client.OK:
log.error('msg="Failed to fetch included file, skipping" path="%s" response="%d"' % (
attachment, res.status_code))
# also notify the user
response = wopi.jsonify('Failed to include a referenced picture in the saved file'), http.client.NOT_FOUND
continue
with zipfile.ZipFile(zip_buffer, "a", zipfile.ZIP_STORED, allowZip64=False) as zip_file:
zip_file.writestr(attachment.split('/')[-1], res.content)
if not forcezip and zip_buffer.getbuffer().nbytes == 0:
# no attachments actually found
return None, response
# also include the markdown file itself
with zipfile.ZipFile(zip_buffer, "a", zipfile.ZIP_STORED, allowZip64=False) as zip_file:
zip_file.writestr(docfilename, mddoc)
return zip_buffer.getvalue(), response
def savetostorage(wopisrc, acctok, isclose, wopilock):
'''Copy document from CodiMD back to storage'''
# get document from CodiMD
try:
log.info('msg="Fetching file from CodiMD" isclose="%s" appurl="%s" token="%s"' %
(isclose, appurl + wopilock['docid'], acctok[-20:]))
mddoc = _fetchfromcodimd(wopilock, acctok)
except AppFailure:
return wopi.jsonify('Could not save file, failed to fetch document from CodiMD'), http.client.INTERNAL_SERVER_ERROR
h = None
if isclose and wopilock['digest'] != 'dirty':
# so far the file was not touched: before forcing a put let's validate the contents
h = hashlib.sha1()
h.update(mddoc)
if h.hexdigest() == wopilock['digest']:
log.info('msg="File unchanged, skipping save" token="%s"' % acctok[-20:])
return '{}', http.client.ACCEPTED
# check if we have attachments
wasbundle = os.path.splitext(wopilock['filename'])[1] == '.zmd'
bundlefile, attresponse = _getattachments(mddoc.decode(), wopilock['filename'].replace('.zmd', '.md'),
(wasbundle and not isclose))
# WOPI PutFile for the file or the bundle if it already existed
if (wasbundle ^ (not bundlefile)) or not isclose:
res = wopi.request(wopisrc, acctok, 'POST', headers={'X-WOPI-Lock': json.dumps(wopilock)},
contents=(bundlefile if wasbundle else mddoc))
reply = wopi.handleputfile('PutFile', wopisrc, res)
if reply:
return reply
if isclose and wopilock['digest'] == 'dirty':
h = hashlib.sha1()
h.update(mddoc)
wopilock = wopi.refreshlock(wopisrc, acctok, wopilock, digest=(h.hexdigest() if h else 'dirty'))
log.info('msg="Save completed" filename="%s" isclose="%s" token="%s"' %
(wopilock['filename'], isclose, acctok[-20:]))
# combine the responses
return attresponse if attresponse else (wopi.jsonify('File saved successfully'), http.client.OK)
# on close, use saveas for either the new bundle, if this is the first time we have attachments,
# or the single md file, if there are no more attachments.
return wopi.saveas(wopisrc, acctok, wopilock, os.path.splitext(wopilock['filename'])[0] + ('.zmd' if bundlefile else '.md'),
bundlefile if bundlefile else mddoc)

170
poc_src/etherpad.py Normal file
View File

@ -0,0 +1,170 @@
'''
etherpad.py
The Etherpad-specific code used by the WOPI bridge.
Author: Giuseppe.LoPresti@cern.ch, CERN/IT-ST
'''
from random import choice
from string import ascii_lowercase
import json
import hashlib
import http.client
import urllib.parse as urlparse
import requests
import wopiclient as wopi
class AppFailure(Exception):
'''A custom exception to represent a fatal failure when contacting Etherpad'''
# initialized by the main class or by the init method
appurl = None
appexturl = None
apikey = None
log = None
skipsslverify = None
groupid = None
def init(env, apipath):
'''Initialize global vars from the environment'''
global appurl
global appexturl
global apikey
global groupid
appexturl = env.get('ETHERPAD_EXT_URL')
if not appexturl:
raise ValueError("Missing ETHERPAD_EXT_URL env var")
appurl = env.get('ETHERPAD_URL')
if not appurl:
# defaults to the external
appurl = appexturl
with open(apipath + 'etherpad_apikey') as f:
apikey = f.readline().strip('\n')
# create a general group to attach all pads
groupid = _apicall('createGroupIfNotExistsFor', {'groupMapper': 1})
groupid = groupid['data']['groupID']
log.info('msg="Got Etherpad global groupid" groupid="%s"' % groupid)
def _apicall(method, params, data=None, acctok=None, raiseonnonzerocode=True):
'''Generic method to call the Etherpad REST API'''
params['apikey'] = apikey
try:
res = requests.post(appurl + '/api/1/' + method, params=params, data=data, verify=not skipsslverify)
if res.status_code != http.client.OK:
log.error('msg="Failed to call Etherpad" method="%s" token="%s" response="%d: %s"' %
(method, acctok[-20:] if acctok else 'N/A', res.status_code, res.content.decode()))
raise AppFailure
except requests.exceptions.ConnectionError as e:
log.error('msg="Exception raised attempting to connect to CodiMD" exception="%s"' % e)
raise AppFailure
res = res.json()
if res['code'] != 0 and raiseonnonzerocode:
log.error('msg="Error response from Etherpad" method="%s" token="%s" response="%s"' %
(method, acctok[-20:] if acctok else 'N/A', res['message']))
raise AppFailure
log.debug('msg="Called Etherpad API" method="%s" token="%s" result="%s"' %
(method, acctok[-20:] if acctok else 'N/A', res))
return res
def getredirecturl(isreadwrite, wopisrc, acctok, wopilock, displayname):
'''Return a valid URL to the app for the given WOPI context'''
if not isreadwrite:
# for read-only mode generate a read-only link
res = _apicall('getReadOnlyID', {'padID': wopilock['docid'][1:]}, acctok=acctok)
return appexturl + '/p/' + res['data']['readOnlyID']
# return the URL to the pad (TODO the metadata argument must be picked up by an Etherpad plugin)
return appexturl + '/p/%s?userName=%s&metadata=%s' % \
(wopilock['docid'][1:], displayname, urlparse.quote_plus('%s?t=%s' % (wopisrc, acctok)))
# Cloud storage to Etherpad
###########################
def loadfromstorage(filemd, wopisrc, acctok, docid):
'''Copy document from storage to Etherpad'''
# WOPI GetFile
res = wopi.request(wopisrc, acctok, 'GET', contents=True)
if res.status_code != http.client.OK:
raise ValueError(res.status_code)
epfile = res.content
# compute its SHA1 hash for later checks if the file was modified
h = hashlib.sha1()
h.update(epfile)
try:
if not docid:
docid = ''.join([choice(ascii_lowercase) for _ in range(20)])
log.debug('msg="Generated random padID for read-only document" docid="%s" token="%s"' % (docid, acctok[-20:]))
# first drop previous pad if it exists
_apicall('deletePad', {'padID': docid}, acctok=acctok, raiseonnonzerocode=False)
# create pad with the given docid as name
_apicall('createGroupPad', {'groupID': groupid, 'padName': docid, 'text': 'placeholder'},
acctok=acctok, raiseonnonzerocode=False)
# push content
res = requests.post(appurl + '/p/' + docid + '/import',
files={'file': (docid + '.etherpad', epfile)}, # a .etherpad file is imported as raw (JSON) content
params={'apikey': apikey},
verify=not skipsslverify)
if res.status_code != http.client.OK:
log.error('msg="Unable to push document to Etherpad" token="%s" response="%d: %s"' %
(acctok[-20:], res.status_code, res.content.decode()))
raise AppFailure
log.info('msg="Pushed document to Etherpad" docid="%s" token="%s"' % (docid, acctok[-20:]))
except requests.exceptions.ConnectionError as e:
log.error('msg="Exception raised attempting to connect to Etherpad" exception="%s"' % e)
raise AppFailure
# generate and return a WOPI lock structure for this document
return wopi.generatelock(docid, filemd, h.hexdigest(), None, acctok, False)
# Etherpad to cloud storage
###########################
def _fetchfrometherpad(wopilock, acctok):
'''Fetch a given document from from Etherpad, raise AppFailure in case of errors'''
try:
# this operation does not use the API (and it is NOT protected by the API key!), so we use a plain GET
res = requests.get(appurl + '/p' + wopilock['docid'] + '/export/etherpad',
verify=not skipsslverify)
if res.status_code != http.client.OK:
log.error('msg="Unable to fetch document from Etherpad" token="%s" response="%d: %s"' %
(acctok[-20:], res.status_code, res.content.decode()))
raise AppFailure
return res.content
except requests.exceptions.ConnectionError as e:
log.error('msg="Exception raised attempting to connect to Etherpad" exception="%s"' % e)
raise AppFailure
def savetostorage(wopisrc, acctok, isclose, wopilock):
'''Copy document from Etherpad back to storage'''
# get document from Etherpad
try:
log.info('msg="Fetching file from Etherpad" isclose="%s" appurl="%s" token="%s"' %
(isclose, appurl + '/p' + wopilock['docid'], acctok[-20:]))
epfile = _fetchfrometherpad(wopilock, acctok)
except AppFailure:
return wopi.jsonify('Could not save file, failed to fetch document from Etherpad'), http.client.INTERNAL_SERVER_ERROR
if isclose and wopilock['digest'] != 'dirty':
# so far the file was not touched: before forcing a put let's validate the contents
h = hashlib.sha1()
h.update(epfile)
if h.hexdigest() == wopilock['digest']:
log.info('msg="File unchanged, skipping save" token="%s"' % acctok[-20:])
return '{}', http.client.ACCEPTED
# WOPI PutFile
res = wopi.request(wopisrc, acctok, 'POST', headers={'X-WOPI-Lock': json.dumps(wopilock)},
contents=epfile)
reply = wopi.handleputfile('PutFile', wopisrc, res)
if reply:
return reply
wopilock = wopi.refreshlock(wopisrc, acctok, wopilock, digest='dirty')
log.info('msg="Save completed" filename="%s" isclose="%s" token="%s"' %
(wopilock['filename'], isclose, acctok[-20:]))
return wopi.jsonify('File saved successfully'), http.client.OK

473
poc_src/wopibridge.py Executable file
View File

@ -0,0 +1,473 @@
#!/usr/bin/python3
'''
wopibridge.py
The WOPI Bridge for IOP. This connector service supports CodiMD and Etherpad.
Author: Giuseppe.LoPresti@cern.ch, CERN/IT-ST
'''
import os
import sys
import time
import traceback
import socket
from platform import python_version
import logging
import threading
import atexit
import functools
import urllib.parse as urlparse
import http.client
import json
import hashlib
import hmac
from base64 import urlsafe_b64encode
try:
import flask
from werkzeug.exceptions import NotFound as Flask_NotFound
from werkzeug.exceptions import MethodNotAllowed as Flask_MethodNotAllowed
except ImportError:
print("Missing modules, please install with `pip3 install flask requests`")
raise
import wopiclient as wopi
WBVERSION = 'git'
# this is the default location of secrets in docker
CERTPATH = '/var/run/secrets/cert.pem'
# path to a secret used to hash noteids and protect the /list endpoint
SECRETPATH = '/var/run/secrets/wbsecret'
# path to the APIKEY secrets
APIKEYPATH = '/var/run/secrets/'
# The supported plugins integrated with this WOPI Bridge
BRIDGE_EXT_PLUGINS = {'md': 'codimd', 'zmd': 'codimd', 'mds': 'codimd', 'epd': 'etherpad'}
# a standard message to be displayed by the app when some content might be lost: this would only
# appear in case of uncaught exceptions or bugs handling the webhook callbacks
RECOVER_MSG = 'Please copy the content to a safe place and reopen the document again to paste it back.'
class WB:
'''A singleton container for all state information of the server'''
approot = os.getenv('APP_ROOT', '/wopib') # application root path
bpr = flask.Blueprint('WOPIBridge', __name__, url_prefix=approot)
app = flask.Flask('WOPIBridge')
log = app.logger
port = 8000
skipsslverify = False
loglevels = {"Critical": logging.CRITICAL, # 50
"Error": logging.ERROR, # 40
"Warning": logging.WARNING, # 30
"Info": logging.INFO, # 20
"Debug": logging.DEBUG # 10
}
active = True
# a map of all open documents: wopisrc -> (acctok, tosave, lastsave, toclose)
# where acctok is one of the access tokens for the given doc, and
# toclose is a dict {shorttok -> isclose} with shorttok = 20 last chars of all known tokens
openfiles = {}
# a map of responses: wopisrc -> (http code, message)
saveresponses = {}
# a condition variable to synchronize the save thread and the main Flask threads
savecv = threading.Condition()
# a map file-extension -> application plugin
plugins = {}
@classmethod
def init(cls):
'''Initialises the application, bails out in case of failures. Note this is not a __init__ method'''
cls.app.register_blueprint(cls.bpr)
try:
# configuration
loghandler = logging.FileHandler('/var/log/wopi/wopibridge.log')
loghandler.setFormatter(logging.Formatter(fmt='%(asctime)s %(name)s[%(process)d] %(levelname)-8s %(message)s',
datefmt='%Y-%m-%dT%H:%M:%S'))
cls.log.addHandler(loghandler)
cls.log.setLevel(cls.loglevels['Debug'])
skipsslverify = os.environ.get('SKIP_SSL_VERIFY')
if isinstance(skipsslverify, str):
cls.skipsslverify = skipsslverify.upper() in ('TRUE', 'YES')
else:
cls.skipsslverify = False
try:
cls.saveinterval = int(os.environ.get('APP_SAVE_INTERVAL'))
except TypeError:
cls.saveinterval = 200
try:
cls.saveinterval = int(os.environ.get('APP_UNLOCK_INTERVAL'))
except TypeError:
cls.unlockinterval = 90
with open(SECRETPATH) as f:
cls.hashsecret = f.readline().strip('\n')
wopi.log = cls.log
wopi.skipsslverify = cls.skipsslverify
# init plugins
for p in set(BRIDGE_EXT_PLUGINS.values()):
try:
cls.plugins[p] = __import__(p, globals(), locals())
cls.plugins[p].log = cls.log
cls.plugins[p].skipsslverify = cls.skipsslverify
cls.plugins[p].init(os.environ, APIKEYPATH)
cls.log.info('msg="Imported plugin for application" app="%s" plugin="%s"' % (p, cls.plugins[p]))
except Exception as e:
cls.log.info('msg="Disabled plugin following failed initialization" app="%s" message="%s"' % (p, e))
cls.plugins[p] = None
if not list(filter(None.__ne__, cls.plugins.values())):
raise ValueError('None of the available app plugins could be initialized')
# start the thread to perform async save operations
cls.savethread = SaveThread()
cls.savethread.start()
except Exception as e: # pylint: disable=broad-except
# any error we get here with the configuration is fatal
cls.log.fatal('msg="Failed to initialize the service, aborting" error="%s"' % e)
sys.exit(22)
@classmethod
def run(cls):
'''Runs the Flask app in secure (standalone) or unsecure mode depending on the context.
Secure https mode typically is to be provided by the infrastructure (k8s ingress, nginx...)'''
if os.path.isfile(CERTPATH):
cls.log.info('msg="WOPI Bridge starting in secure mode" baseUrl="%s" version="%s"' % (cls.approot, WBVERSION))
cls.app.run(host='0.0.0.0', port=cls.port, threaded=True,
ssl_context=(CERTPATH, CERTPATH.replace('cert', 'key')))
else:
cls.log.info('msg="WOPI Bridge starting in unsecure/debugging mode" baseUrl="%s" version="%s"' % (cls.approot, WBVERSION))
cls.app.run(host='0.0.0.0', port=cls.port, threaded=True, debug=True)
def _guireturn(msg):
'''One-liner to better render messages that may be visible in the UI'''
return '<div align="center" style="color:#808080; padding-top:50px; font-family:Verdana">%s</div>' % msg
def _gendocid(wopisrc):
'''Generate a URL safe hash of the wopisrc to be used as document id by the app'''
dig = hmac.new(WB.hashsecret.encode(), msg=wopisrc.split('/')[-1].encode(), digestmod=hashlib.sha1).digest()
return urlsafe_b64encode(dig).decode()[:-1]
# The Web Application starts here
#############################################################################################################
@WB.app.errorhandler(Exception)
def handleexception(ex):
'''Generic method to log any uncaught exception'''
if isinstance(ex, (Flask_NotFound, Flask_MethodNotAllowed)):
return ex
ex_type, ex_value, ex_traceback = sys.exc_info()
WB.log.error('msg="Unexpected exception caught" exception="%s" type="%s" traceback="%s"' %
(ex, ex_type, traceback.format_exception(ex_type, ex_value, ex_traceback)))
return wopi.jsonify('Internal error, please contact support. %s' % RECOVER_MSG), http.client.INTERNAL_SERVER_ERROR
@WB.app.route("/", methods=['GET'])
def redir():
'''A simple redirect to the page below'''
return flask.redirect(WB.approot + '/')
@WB.bpr.route("/", methods=['GET'])
def index():
'''Return a default index page with some user-friendly information about this service'''
#WB.log.debug('msg="Accessed index page" client="%s"' % flask.request.remote_addr)
return """
<html><head><title>ScienceMesh WOPI Bridge</title></head>
<body>
<div align="center" style="color:#000080; padding-top:50px; font-family:Verdana; size:11">
This is a WOPI HTTP bridge, to be used in conjunction with a WOPI-enabled EFSS.<br>Supports CodiMD and Etherpad.<br>
To use this service, please log in to your EFSS Storage and click on a supported document.</div>
<div style="position: absolute; bottom: 10px; left: 10px; width: 99%%;"><hr>
<i>ScienceMesh WOPI Bridge %s at %s. Powered by Flask %s for Python %s</i>.</div>
</body>
</html>
""" % (WBVERSION, socket.getfqdn(), flask.__version__, python_version())
@WB.bpr.route("/open", methods=['GET'])
def appopen():
'''Open a MD doc by contacting the provided WOPISrc with the given access_token'''
try:
wopisrc = urlparse.unquote(flask.request.args['WOPISrc'])
acctok = flask.request.args['access_token']
WB.log.info('msg="Open called" client="%s" user-agent="%s" token="%s"' %
(flask.request.remote_addr, flask.request.user_agent, acctok[-20:]))
except KeyError as e:
WB.log.error('msg="Open: unable to open the file, missing WOPI context" error="%s"' % e)
return _guireturn('Missing arguments'), http.client.BAD_REQUEST
# WOPI GetFileInfo
res = wopi.request(wopisrc, acctok, 'GET')
if res.status_code != http.client.OK:
WB.log.warning('msg="Open: unable to fetch file WOPI metadata" response="%d"' % res.status_code)
return _guireturn('Invalid WOPI context'), http.client.NOT_FOUND
filemd = res.json()
app = BRIDGE_EXT_PLUGINS.get(os.path.splitext(filemd['BaseFileName'])[1][1:])
if not app:
WB.log.warning('msg="Open: file type not supported" filename="%s" token="%s"' % (filemd['FileName'], acctok[-20:]))
return _guireturn('File type not supported'), http.client.BAD_REQUEST
WB.log.debug('msg="Processing open for supported app" app="%s" plugin="%s"' % (app, WB.plugins[app]))
app = WB.plugins[app]
try:
# use the 'UserCanWrite' attribute to decide whether the file is to be opened in read-only mode
if filemd['UserCanWrite']:
try:
# was it already being worked on?
wopilock = wopi.getlock(wopisrc, acctok)
WB.log.info('msg="Lock already held" lock="%s" token="%s"' % (wopilock, acctok[-20:]))
# add this token to the list, if not already in
if acctok[-20:] not in wopilock['toclose']:
wopilock = wopi.refreshlock(wopisrc, acctok, wopilock)
except wopi.InvalidLock as e:
if str(e) != str(int(http.client.NOT_FOUND)):
# lock is invalid/corrupted: force read-only mode
WB.log.info('msg="Invalid lock, forcing read-only mode" error="%s" token="%s"' % (e, acctok[-20:]))
filemd['UserCanWrite'] = False
# otherwise, this is the first user opening the file; in both cases, fetch it
wopilock = app.loadfromstorage(filemd, wopisrc, acctok, _gendocid(wopisrc))
# and WOPI Lock it
res = wopi.request(wopisrc, acctok, 'POST', headers={'X-WOPI-Lock': json.dumps(wopilock),
'X-Wopi-Override': 'LOCK'})
if res.status_code != http.client.OK:
# failed to lock the file: open in read-only mode
WB.log.warning('msg="Failed to lock the file" response="%d" token="%s"' %
(res.status_code, acctok[-20:]))
filemd['UserCanWrite'] = False
# keep track of this open document for the save thread and for statistical purposes
if wopisrc in WB.openfiles:
# use the new acctok and the new/current wopilock content
WB.openfiles[wopisrc]['acctok'] = acctok
WB.openfiles[wopisrc]['toclose'] = wopilock['toclose']
else:
WB.openfiles[wopisrc] = {'acctok': acctok, 'tosave': False,
'lastsave': int(time.time()) - WB.saveinterval,
'toclose': {acctok[-20:]: False},
'docid': wopilock['docid'],
}
# also clear any potential stale response for this document
try:
del WB.saveresponses[wopisrc]
except KeyError:
pass
else:
# user has no write privileges, just fetch the document and push it to the app on a random docid
wopilock = app.loadfromstorage(filemd, wopisrc, acctok, None)
except app.AppFailure:
# this can be raised by loadfromstorage
return _guireturn('Unable to load the app, please try again later or contact support'), http.client.INTERNAL_SERVER_ERROR
# here we append the user browser to the displayName
# TODO need to review this for production usage, it should actually come from WOPI if configured accordingly
redirecturl = app.getredirecturl(
filemd['UserCanWrite'], wopisrc, acctok, wopilock,
urlparse.quote_plus(filemd['UserFriendlyName'] + '@' + \
(flask.request.user_agent.platform[:3] if flask.request.user_agent.platform else 'oth')))
WB.log.info('msg="Redirecting client to the app" redirecturl="%s"' % redirecturl)
return flask.redirect(redirecturl)
@WB.bpr.route("/save", methods=['POST'])
def appsave():
'''Save a MD doc given its WOPI context, and return a JSON-formatted message. The actual save is asynchronous.'''
# fetch metadata from request
try:
meta = urlparse.unquote(flask.request.headers['X-EFSS-Metadata'])
wopisrc = meta[:meta.index('?t=')]
acctok = meta[meta.index('?t=')+3:]
isclose = flask.request.args.get('close') == 'true'
docid = flask.request.args.get('id')
WB.log.info('msg="Save: requested action" isclose="%s" docid="%s" wopisrc="%s" token="%s"' %
(isclose, docid, wopisrc, acctok[-20:]))
except (KeyError, ValueError) as e:
WB.log.error('msg="Save: malformed or missing metadata" client="%s" headers="%s" exception="%s" error="%s"' %
(flask.request.remote_addr, flask.request.headers, type(e), e))
return wopi.jsonify('Malformed or missing metadata, could not save. %s' % RECOVER_MSG), http.client.INTERNAL_SERVER_ERROR
# decide whether to notify the save thread
donotify = isclose or wopisrc not in WB.openfiles or WB.openfiles[wopisrc]['lastsave'] < time.time() - WB.saveinterval
# enqueue the request, it will be processed asynchronously
with WB.savecv:
if wopisrc in WB.openfiles:
WB.openfiles[wopisrc]['tosave'] = True
WB.openfiles[wopisrc]['toclose'][acctok[-20:]] = isclose
else:
WB.log.info('msg="Save: repopulating missing metadata" wopisrc="%s" token="%s"' % (wopisrc, acctok[-20:]))
WB.openfiles[wopisrc] = {'acctok': acctok, 'tosave': True,
'lastsave': int(time.time() - WB.saveinterval),
'toclose': {acctok[-20:]: isclose},
'docid': docid,
}
# if it's the first time we heard about this wopisrc, remove any potential stale response
try:
del WB.saveresponses[wopisrc]
except KeyError:
pass
if donotify:
# note that the save thread stays locked until we release the context, after return!
WB.savecv.notify()
# return latest known state for this document
if wopisrc in WB.saveresponses:
resp = WB.saveresponses[wopisrc]
WB.log.info('msg="Save: returned response" response="%s" token="%s"' % (resp, acctok[-20:]))
del WB.saveresponses[wopisrc]
return resp
WB.log.info('msg="Save: enqueued action" immediate="%s" token="%s"' % (donotify, acctok[-20:]))
return '{}', http.client.ACCEPTED
@WB.bpr.route("/list", methods=['GET'])
def applist():
'''Return a list of all currently opened files'''
if (flask.request.headers.get('Authorization') != 'Bearer ' + WB.hashsecret) and \
(flask.request.args.get('apikey') != WB.hashsecret): # added for convenience
WB.log.warning('msg="List: unauthorized access attempt, missing authorization token" '
'client="%s"' % flask.request.remote_addr)
return _guireturn('Client not authorized'), http.client.UNAUTHORIZED
WB.log.info('msg="List: returning list of open files" client="%s"' % flask.request.remote_addr)
return flask.Response(json.dumps(WB.openfiles), mimetype='application/json')
#############################################################################################################
def _intersection(boolsd):
'''Given a dictionary of booleans, returns the intersection (AND) of all'''
return functools.reduce(lambda x, y: x and y, list(boolsd.values()))
def _union(boolsd):
'''Given a dictionary of booleans, returns the union (OR) of all'''
return functools.reduce(lambda x, y: x or y, list(boolsd.values()))
class SaveThread(threading.Thread):
'''Async thread for save operations'''
def run(self):
'''Perform all pending save to storage operations'''
WB.log.info('msg="SaveThread starting"')
while True:
with WB.savecv:
# sleep for one minute or until awaken
WB.savecv.wait(60)
if not WB.active:
break
# execute a round of sync to storage; list is needed as entries are eventually deleted from the dict
for wopisrc, openfile in list(WB.openfiles.items()):
try:
wopilock = self.savedirty(openfile, wopisrc)
wopilock = self.closewhenidle(openfile, wopisrc, wopilock)
self.cleanup(openfile, wopisrc, wopilock)
except Exception as e: # pylint: disable=broad-except
ex_type, ex_value, ex_traceback = sys.exc_info()
WB.log.error('msg="SaveThread: unexpected exception caught" ex="%s" type="%s" traceback="%s"' %
(e, ex_type, traceback.format_exception(ex_type, ex_value, ex_traceback)))
WB.log.info('msg="SaveThread terminated, shutting down"')
def savedirty(self, openfile, wopisrc):
'''save documents that are dirty for more than `saveinterval` or that are being closed'''
wopilock = None
if openfile['tosave'] and (_intersection(openfile['toclose'])
or (openfile['lastsave'] < time.time() - WB.saveinterval)):
try:
wopilock = wopi.getlock(wopisrc, openfile['acctok'])
except wopi.InvalidLock:
WB.log.info('msg="SaveThread: attempting to relock file" token="%s" docid="%s"' %
(openfile['acctok'][-20:], openfile['docid']))
try:
wopilock = WB.saveresponses[wopisrc] = wopi.relock(
wopisrc, openfile['acctok'], openfile['docid'], _intersection(openfile['toclose']))
except wopi.InvalidLock as ile:
# even this attempt failed, give up
# TODO here we should save the file on a local storage to help later recovery
WB.saveresponses[wopisrc] = wopi.jsonify(str(ile)), http.client.INTERNAL_SERVER_ERROR
# set some 'fake' metadata, will be automatically cleaned up later
openfile['lastsave'] = int(time.time())
openfile['tosave'] = False
openfile['toclose'] = {'invalid-lock': True}
return None
app = BRIDGE_EXT_PLUGINS.get(wopilock['app'])
if not app:
WB.log.error('msg="SaveThread: malformed app attribute in WOPI lock" lock="%s"' % wopilock)
WB.saveresponses[wopisrc] = wopi.jsonify('Unrecognized app for this file'), http.client.BAD_REQUEST
else:
WB.log.info('msg="SaveThread: saving file" token="%s" docid="%s"' %
(openfile['acctok'][-20:], openfile['docid']))
WB.saveresponses[wopisrc] = WB.plugins[app].savetostorage(
wopisrc, openfile['acctok'], _intersection(openfile['toclose']), wopilock)
openfile['lastsave'] = int(time.time())
openfile['tosave'] = False
return wopilock
def closewhenidle(self, openfile, wopisrc, wopilock):
'''close and unlock documents tha are idle for more than 4x the save interval (about 14 minutes by default).
They will transparently be relocked when/if the session resumes, but we seem to miss some close notifications,
therefore this also works as a cleanup step'''
if openfile['lastsave'] < int(time.time()) - 4*WB.saveinterval:
try:
wopilock = wopi.getlock(wopisrc, openfile['acctok']) if not wopilock else wopilock
# this will force a close in the cleanup step
openfile['toclose'] = {t: True for t in openfile['toclose']}
WB.log.info('msg="SaveThread: force-closing document" lastsavetime="%s" toclosetokens="%s"' %
(openfile['lastsave'], openfile['toclose']))
except wopi.InvalidLock:
# lock is gone, just cleanup our metadata
WB.log.warning('msg="SaveThread: cleaning up metadata, detected missed close event" url="%s"' % wopisrc)
del WB.openfiles[wopisrc]
return wopilock
def cleanup(self, openfile, wopisrc, wopilock):
'''remove state for closed documents after some time'''
if _union(openfile['toclose']) and not openfile['tosave']:
# check lock
try:
wopilock = wopi.getlock(wopisrc, openfile['acctok']) if not wopilock else wopilock
except wopi.InvalidLock:
# nothing to do here, this document may have been closed by another wopibridge
if openfile['lastsave'] < time.time() - WB.unlockinterval:
# yet cleanup only after the unlockinterval time, cf. the InvalidLock handling in savedirty()
WB.log.info('msg="SaveThread: cleaning up metadata, file already unlocked" url="%s"' % wopisrc)
del WB.openfiles[wopisrc]
return
# reconcile list of toclose tokens
openfile['toclose'] = {t: wopilock['toclose'][t] or (t in openfile['toclose'] and openfile['toclose'][t])
for t in wopilock['toclose']}
if _intersection(openfile['toclose']):
if openfile['lastsave'] < int(time.time()) - WB.unlockinterval:
# nobody is still on this document and some time has passed, unlock
res = wopi.request(wopisrc, openfile['acctok'], 'POST',
headers={'X-WOPI-Lock': json.dumps(wopilock), 'X-Wopi-Override': 'UNLOCK'})
if res.status_code != http.client.OK:
WB.log.warning('msg="SaveThread: failed to unlock" lastsavetime="%s" token="%s" response="%s"' %
(openfile['lastsave'], openfile['acctok'][-20:], res.status_code))
else:
WB.log.info('msg="SaveThread: unlocked document" lastsavetime="%s" token="%s"' %
(openfile['lastsave'], openfile['acctok'][-20:]))
del WB.openfiles[wopisrc]
elif openfile['toclose'] != wopilock['toclose']:
# some user still on it, refresh lock if the toclose part has changed
wopi.refreshlock(wopisrc, openfile['acctok'], wopilock, toclose=openfile['toclose'])
@atexit.register
def stopsavethread():
'''Exit handler to cleanly stop the storage sync thread'''
WB.log.info('msg="Waiting for SaveThread to complete"')
with WB.savecv:
WB.active = False
WB.savecv.notify()
#
# Start the Flask endless listening loop and the background sync thread
#
if __name__ == '__main__':
WB.init()
WB.run()

182
poc_src/wopiclient.py Normal file
View File

@ -0,0 +1,182 @@
'''
wopiclient.py
A set of WOPI functions for the WOPI bridge service.
Author: Giuseppe.LoPresti@cern.ch, CERN/IT-ST
'''
import os
import json
import http.client
import requests
from flask import Response
class InvalidLock(Exception):
'''A custom exception to represent an invalid or missing WOPI lock'''
# initialized by the main class
log = None
skipsslverify = None
def jsonify(msg):
'''One-liner to consistently json-ify a given message'''
# a delay = 0 means the user has to click on it to dismiss it, good for longer messages:
# this is an extended feature of CodiMD only, TODO have it supported on Etherpad as well
return '{"message": "%s", "delay": "%.1f"}' % (msg, 0 if len(msg) > 60 else 0.5 + len(msg)/20)
def request(wopisrc, acctok, method, contents=None, headers=None):
'''Execute a WOPI request with the given parameters and headers'''
try:
wopiurl = '%s%s' % (wopisrc, ('/contents' if contents is not None and
(not headers or headers.get('X-WOPI-Override') != 'PUT_RELATIVE')
else ''))
log.debug('msg="Calling WOPI" url="%s" headers="%s" acctok="%s"' %
(wopiurl, headers, acctok[-20:]))
if method == 'GET':
return requests.get('%s?access_token=%s' % (wopiurl, acctok), verify=not skipsslverify)
if method == 'POST':
return requests.post('%s?access_token=%s' % (wopiurl, acctok), verify=not skipsslverify,
headers=headers, data=contents)
except requests.exceptions.ConnectionError as e:
log.error('msg="Unable to contact WOPI" wopiurl="%s" acctok="%s" response="%s"' % (wopiurl, acctok, e))
res = Response()
res.status_code = http.client.INTERNAL_SERVER_ERROR
return res
return None
def generatelock(docid, filemd, digest, app, acctok, isclose):
'''return a dict to be used as WOPI lock, in the format { docid, filename, digest, app, toclose },
where toclose is like in the openfiles map'''
return {'docid': '/' + docid.strip('/'),
'filename': filemd['BaseFileName'],
'digest': digest,
'app': app if app else os.path.splitext(filemd['BaseFileName'])[1][1:],
'toclose': {acctok[-20:]: isclose},
}
def getlock(wopisrc, acctok):
'''Return the currently held WOPI lock, or raise InvalidLock otherwise'''
try:
res = request(wopisrc, acctok, 'POST', headers={'X-Wopi-Override': 'GET_LOCK'})
if res.status_code != http.client.OK:
# lock got lost or any other error
raise InvalidLock(res.status_code)
# the lock is expected to be a JSON dict, see generatelock()
return json.loads(res.headers.get('X-WOPI-Lock'))
except (ValueError, KeyError, json.decoder.JSONDecodeError) as e:
log.warning('msg="Missing or malformed WOPI lock" exception="%s" error="%s"' % (type(e), e))
raise InvalidLock(e)
def refreshlock(wopisrc, acctok, wopilock, digest=None, toclose=None):
'''Refresh an existing WOPI lock. Returns the new lock if successful, None otherwise'''
newlock = json.loads(json.dumps(wopilock)) # this is a hack for a deep copy, to be redone in Go
if toclose:
# we got the full 'toclose' dict, push it as is
newlock['toclose'] = toclose
elif acctok[-20:] not in wopilock['toclose']:
# if missing, just append the short token to the 'toclose' dict, similarly to the openfiles map
newlock['toclose'][acctok[-20:]] = False
if digest and wopilock['digest'] != digest:
newlock['digest'] = digest
lockheaders = {'X-Wopi-Override': 'REFRESH_LOCK',
'X-WOPI-OldLock': json.dumps(wopilock),
'X-WOPI-Lock': json.dumps(newlock)
}
res = request(wopisrc, acctok, 'POST', headers=lockheaders)
if res.status_code == http.client.OK:
return newlock
if res.status_code == http.client.CONFLICT:
# we have a race condition, another thread has updated the lock before us
log.warning('msg="Got conflict in refreshing lock, retrying" url="%s"' % wopisrc)
currlock = getlock(wopisrc, acctok)
if toclose:
# merge toclose token lists
for t in currlock['toclose']:
toclose[t] = currlock['toclose'][t] or (t in toclose and toclose[t])
# recursively retry, the recursion is going to stop in one round
return refreshlock(wopisrc, acctok, currlock, digest, toclose)
log.error('msg="Calling WOPI RefreshLock failed" url="%s" response="%d" reason="%s"' %
(wopisrc, res.status_code, res.headers.get('X-WOPI-LockFailureReason')))
return None
def relock(wopisrc, acctok, docid, isclose):
'''Relock again a given document and return a valid WOPI lock, or raise InvalidLock otherwise (cf. SaveThread)'''
# first get again the file metadata
res = request(wopisrc, acctok, 'GET')
if res.status_code != http.client.OK:
log.warning('msg="Session expired or file renamed when attempting to relock it" response="%d" token="%s"' %
(res.status_code, acctok[-20:]))
raise InvalidLock('Session expired, please refresh this page')
filemd = res.json()
# lock the file again: we assume we are alone as the previous lock had been released
wopilock = generatelock(docid, filemd, 'dirty', None, acctok, isclose)
lockheaders = {'X-WOPI-Lock': json.dumps(wopilock),
'X-WOPI-Override': 'REFRESH_LOCK',
'X-WOPI-Validate-Target': 'True' # this is an extension of the Lock API
}
res = request(wopisrc, acctok, 'POST', headers=lockheaders)
if res.status_code == http.client.CONFLICT:
log.warning('msg="Got conflict in relocking the file" response="%d" token="%s" reason="%s"' %
(res.status_code, acctok[-20:], res.headers.get('X-WOPI-LockFailureReason')))
raise InvalidLock('The file was modified externally, please refresh this page to get its current version')
if res.status_code != http.client.OK:
log.warning('msg="Failed to relock the file" response="%d" token="%s" reason="%s"' %
(res.status_code, acctok[-20:], res.headers.get('X-WOPI-LockFailureReason')))
raise InvalidLock('Failed to relock the file on save, please refresh this page')
# relock was successful, return lock: along with noteids univocally associated to files (WOPISrc's),
# we are sure no other updates could have been missed
return wopilock
def handleputfile(wopicall, wopisrc, res):
'''Deal with conflicts or errors following a PutFile/PutRelative request'''
if res.status_code == http.client.CONFLICT:
log.warning('msg="Conflict when calling WOPI %s" url="%s" reason="%s"' %
(wopicall, wopisrc, res.headers.get('X-WOPI-LockFailureReason')))
return jsonify('Error saving the file. %s' % res.headers.get('X-WOPI-LockFailureReason')), \
http.client.INTERNAL_SERVER_ERROR
if res.status_code != http.client.OK:
log.error('msg="Calling WOPI %s failed" url="%s" response="%s"' % (wopicall, wopisrc, res.status_code))
# TODO need to save the file on a local storage for later recovery
return jsonify('Error saving the file, please contact support'), http.client.INTERNAL_SERVER_ERROR
return None
def saveas(wopisrc, acctok, wopilock, targetname, content):
'''Save a given document with an alternate name by using WOPI PutRelative'''
putrelheaders = {'X-WOPI-Lock': json.dumps(wopilock),
'X-WOPI-Override': 'PUT_RELATIVE',
# SuggestedTarget to not overwrite a possibly existing file
'X-WOPI-SuggestedTarget': targetname
}
res = request(wopisrc, acctok, 'POST', headers=putrelheaders, contents=content)
reply = handleputfile('PutRelative', wopisrc, res)
if reply:
return reply
# use the new file's metadata from PutRelative to remove the previous file: we can do that only on close
# because we need to keep using the current wopisrc/acctok until the session is alive in the app
newname = res.json()['Name']
# unlock and delete original file
res = request(wopisrc, acctok, 'POST', headers={'X-WOPI-Lock': json.dumps(wopilock), 'X-Wopi-Override': 'UNLOCK'})
if res.status_code != http.client.OK:
log.warning('msg="Failed to unlock the previous file" token="%s" response="%d"' %
(acctok[-20:], res.status_code))
else:
res = request(wopisrc, acctok, 'POST', headers={'X-Wopi-Override': 'DELETE'})
if res.status_code != http.client.OK:
log.warning('msg="Failed to delete the previous file" token="%s" response="%d"' %
(acctok[-20:], res.status_code))
else:
log.info('msg="Previous file unlocked and removed successfully" token="%s"' % acctok[-20:])
log.info('msg="Final save completed" filename"%s" token="%s"' % (newname, acctok[-20:]))
return jsonify('File saved successfully'), http.client.OK

19
wopibridge-poc.Dockerfile Normal file
View File

@ -0,0 +1,19 @@
# Dockerfile for the WOPI Bridge PoC
#
# Build: docker-compose -f wopibridge.yaml build --build-arg VERSION=`git describe | sed 's/^v//'` wopibridge
FROM python:3.10.0a6-slim
ARG VERSION=latest
LABEL maintainer="cernbox-admins@cern.ch" \
org.opencontainers.image.title="The ScienceMesh IOP WOPI bridge" \
org.opencontainers.image.version="$VERSION"
RUN pip install --upgrade pip setuptools && \
pip install flask requests
RUN mkdir -p /var/log/wopi /app
ADD poc_src/* /app/
RUN sed -i "s/WBVERSION = 'git'/WBVERSION = '$VERSION'/" /app/wopibridge.py
CMD ["python3", "/app/wopibridge.py"]

40
wopibridge.yaml Normal file
View File

@ -0,0 +1,40 @@
# docker-compose configuration file for the WOPI bridge
#
# Build with: docker-compose -f wopibridge.yaml build --build-arg VERSION=`git describe | sed 's/^v//'` wopibridge
# Run with: CODIMD_HOST=http://your-codimd-host CODIMDPROXY_URL=https://your-ingress/codimd docker-compose -f wopibridge.yaml up -d
version: "3.3"
services:
wopibridge:
build:
context: .
dockerfile: wopibridge-poc.Dockerfile
image: wopibridge-poc:cern
container_name: wopibridge
hostname: cbox-iop-wopibridge
network_mode: "bridge"
restart: always
ports:
- 8000:8000
environment:
- CODIMD_INT_URL=${CODIMD_HOST}:3000
- CODIMD_EXT_URL=${CODIMDPROXY_URL}
volumes:
- logs:/var/log/wopi
secrets:
- wbsecret
- codimd_apikey
healthcheck:
test: ["CMD", "curl", "--insecure", "https://localhost:8000"]
interval: 600s
timeout: 5s
retries: 3
secrets:
wbsecret:
file: /etc/wopi/wbsecret
codimd_apikey:
file: /etc/wopi/codimd_apikey
volumes:
logs: