1.背景介绍
 
       在AWS平台下,利用S3存储桶自动触发Lambda完成PDF缩略图的生成。使用AWS CDK 完成应用的全栈部署。
 
       文中使用AWS海外帐号进行演示,并在最后对此示例在AWS中国区域的差异进行补充说明。也可直接下载文中提供的源代码在AWS海外、中国区进行自动化部署。
 
       1.1AWS CDK
 
       预置云应用程序是一个具有挑战性的过程,您需要执行手工操作、编写自定义脚本、维护模板或学习特定领域的语言。
 
       AWS 云开发工具包 (AWS CDK) 是一种开源软件开发框架,用以使用熟悉的编程语言(目前支持TypeScript, JavaScript, Python, C# 和 Java)模拟和预置云应用程序资源。AWS CDK 通过 AWS CloudFormation 以安全、可重复的方式预置AWS资源,使开发人员能够更加友好的方式预置AWS应用。
 
       1.2Ghostscript
 
       Ghostscript是一套建基于Adobe、PostScript及可移植文档格式(PDF)的页面描述语言等而编译成的自由软件(GNU Affero GPL license)。本文在Lambda中使用Ghostscript完成pdf到缩略图的转换工作。
 
       2.架构介绍
 
       为PDF文件生成缩略图的流程:
 
       
 
       用户上传PDF文件到S3存储桶,事件触发Lambda进行格式转换工作,并将缩略图保存到S3存储桶。
 
       3.从零开始构建应用程序及其部署
 
       示例使用Amazon AMI 2启动的EC2实例做为编程、部署环境。
 
       3.1[可选]构建支持Ghostscript的Lambda运行环境需要的
 
       下面会使用AWS Lambda Layer,将Ghostscript添加到Lambda的运行环境之中。
 
        
        $ sudo su –
# yum install git docker -y
# systemctl start docker
# vi make
 
         
       添加内容:
 
        
        .ONESHELL:
init:
curl -OL https://github.com/ArtifexSoftware/ghostpdl-downloads/releases/download/gs927/ghostscript-9.27.tar.gz
 
         
        
        gs /opt/bin/gs: ghostscript-9.27.tar.gz
tar -zxf $<
cd ghostscript-9.27
./configure \
--without-luratech \
--prefix=/opt/
make all
make install
 
         
        
       开始构建Ghostscript的Lambda运行环境
 
        
        # docker run -it --rm -w=/var/task/build -v $PWD:/var/task -v $PWD:/opt/ --entrypoint /usr/bin/make -t lambci/lambda-base-2:build  -f ../make init all
# zip -r gs.zip bin share
 
         
       构建中使用lambci/lambda-base-2:build为基础镜像,此镜像提供了AWS Lambda运行的沙盒本地环境,在此环境中可以build各种Lambda自定义运行环境。本文构建ghostscript运行环境,并输出gs.zip包,在后面的AWS CDK自动部署中,将使用此包发布AWS Lambda Layer,为Lambda提供Ghostscript运行环境。
 
       3.2安装nodejs
 
        
        # curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.34.0/install.sh | bash
# . ~/.nvm/nvm.sh
# nvm install node
# node --version
 
         
       3.3安装 AWS-CDK
 
        
        # npm install -g aws-cdk
# cdk --version
 
         
       3.4构建AWS CDK应用程序
 
       # mkdir thumbnail && cd thumbnail/
 
        
        
        # cdk init --language javascript
 
         
        
        
        # npm i @aws-cdk/aws-iam @aws-cdk/aws-lambda @aws-cdk/aws-lambda-event-sources @aws-cdk/aws-logs @aws-cdk/aws-s3 @aws-cdk/aws-s3-notifications @aws-cdk/core path
 
         
        
        
        # mkdir tmp
# cp ../gs.zip ./tmp/
 
         
        
        
        # mkdir pdf2jpg-lambda
# vi pdf2jpg-lambda/child-process-promise.js
 
         
        
        /*global module, require, console, Promise */
'use strict';
const childProcess = require('child_process'),
    spawnPromise = function (command, argsarray, envOptions) {
        return new Promise((resolve, reject) => {
            console.log('executing', command, argsarray.join(' '));
            const childProc = childProcess.spawn(command, argsarray, envOptions || { env: process.env, cwd: process.cwd() }),
                resultBuffers = [];
            childProc.stdout.on('data', buffer => {
                console.log(buffer.toString());
                resultBuffers.push(buffer);
            });
            childProc.stderr.on('data', buffer => console.error(buffer.toString()));
            childProc.on('exit', (code, signal) => {
                console.log(`${command} completed with ${code}:${signal}`);
                if (code || signal) {
                    reject(`${command} failed with ${code || signal}`);
                } else {
                    resolve(Buffer.concat(resultBuffers).toString().trim());
                }
            });
        });
    };
module.exports = {
    spawn: spawnPromise
};
 
         
        
        # vi pdf2jpg-lambda/s3-util.js
 
         
        
        /*global module, require, Promise, console */
const aws = require('aws-sdk'),
    fs = require('fs'),
    s3 = new aws.S3(),
    downloadFileFromS3 = function (bucket, fileKey, filePath) {
        'use strict';
        console.log('downloading', bucket, fileKey, filePath);
        return new Promise(function (resolve, reject) {
            const file = fs.createWriteStream(filePath),
                stream = s3.getObject({
                    Bucket: bucket,
                    Key: fileKey
                }).createReadStream();
            stream.on('error', reject);
            file.on('error', reject);
            file.on('finish', function () {
                console.log('downloaded', bucket, fileKey);
                resolve(filePath);
            });
            stream.pipe(file);
        });
    }, uploadFileToS3 = function (bucket, fileKey, filePath, contentType) {
        'use strict';
        console.log('uploading', bucket, fileKey, filePath);
        return s3.upload({
            Bucket: bucket,
            Key: fileKey,
            Body: fs.createReadStream(filePath),
            ACL: 'private',
            ContentType: contentType
        }).promise();
    };
module.exports = {
    downloadFileFromS3: downloadFileFromS3,
    uploadFileToS3: uploadFileToS3
};
 
         
        
 
        
        # vi pdf2jpg-lambda/index.js
 
         
        
        const s3Util = require('./s3-util'),
childProcessPromise = require('./child-process-promise'),
path = require('path'),
os = require('os'),
EXTENSION = process.env.EXTENSION,
OUTPUT_BUCKET = process.env.OUTPUT_BUCKET,
MIME_TYPE =  process.env.MIME_TYPE;
exports.handler = function (eventObject, context) {
    console.log('eventObject', eventObject);;
    const eventRecord = eventObject.Records && eventObject.Records[0],
        inputBucket = eventRecord.s3.bucket.name,
        key = eventRecord.s3.object.key,
        id = context.awsRequestId,
        resultKey = 'thumbnail/' + key.replace(/\.[^.]+$/, EXTENSION),
        workdir = os.tmpdir(),
        inputFile = path.join(workdir,  id + path.extname(key)),
        outputFile = path.join(workdir, 'converted-' + id + EXTENSION);
    console.log('converting', inputBucket, key, 'using', inputFile);
    return s3Util.downloadFileFromS3(inputBucket, key, inputFile)
        .then(() => childProcessPromise.spawn(
            '/opt/bin/gs',
            ['-sDEVICE=jpeggray','-dSAFER','-dBATCH', '-dNOPAUSE','-sPageList=1','-o', outputFile, inputFile],
            {env: process.env, cwd: workdir}
        ))
        .then(() => s3Util.uploadFileToS3(OUTPUT_BUCKET, resultKey, outputFile, MIME_TYPE));
};
 
         
       Ghostscript 常用参数说明, https://www.ghostscript.com/doc/current/Use.htm:
 
        
 
        
         
          
          | DEVICE | Ghostscript 输出的格式 | 
 
          
          | SAFER | 启用对文件的访问控制 | 
 
          
          | BATCH | 执行结束后退出 | 
 
          
          | NOPAUSE | 禁用提示和暂停 | 
 
          
          | PageList | 转换的页数 | 
 
         
       
 
        
 
        
        
        #vi lib/thumbnail-stack.js
 
         
        
        const cdk = require('@aws-cdk/core');
const s3 = require('@aws-cdk/aws-s3');
const lambda = require('@aws-cdk/aws-lambda');
const iam = require('@aws-cdk/aws-iam')
const path = require('path');
const s3n = require('@aws-cdk/aws-s3-notifications');
class ThumbnailStack extends cdk.Stack {
  /**
   *
   * @param {cdk.Construct} scope
   * @param {string} id
   * @param {cdk.StackProps=} props
   */
  constructor(scope, id, props) {
    super(scope, id, props);
    // 为保存PDF和缩略图创建S3存储桶
    const imgBucket = new s3.Bucket(this, 'img-bucket', {
    });
    //ghostscript environment in lambda layer
    const layer = new lambda.LayerVersion(this, 'ghostscript-layer', {
        code: lambda.Code.fromAsset(path.join(__dirname, '../tmp/gs.zip')),
        compatibleRuntimes: [lambda.Runtime.NODEJS_10_X],
        license: 'Apache-2.0',
        description: 'A layer to host ghostscript',
    });
    //lambda IAM role
    const pdf2jpgLambdaRole = new iam.Role(this, 'pdf2jpg-lambda-Role', {
        assumedBy: new iam.ServicePrincipal('lambda.amazonaws.com')
    });
    //lambda IAM role policy
    pdf2jpgLambdaRole.addToPolicy(new iam.PolicyStatement({
        resources: [imgBucket.bucketArn + '/*'],
        actions: ['s3:PutObject', 's3:GetObject']
    }));
    pdf2jpgLambdaRole.addToPolicy(new iam.PolicyStatement({
        resources: ['*'],
        actions: ['logs:CreateLogGroup', 'logs:CreateLogStream', 'logs:PutLogEvents']
    }));
    //create lambda function
    const pdf2jpgLambda = new lambda.Function(this, 'pdf2jpg-lambda', {
        code: lambda.Code.asset('pdf2jpg-lambda'),
        handler: 'index.handler',
        runtime: lambda.Runtime.NODEJS_10_X,
        layers: [layer],
        timeout: cdk.Duration.minutes(5),
        role: pdf2jpgLambdaRole,
        memorySize: 512,
        environment: {
            EXTENSION: '.jpg',
            MIME_TYPE: 'image/jpeg',
            OUTPUT_BUCKET: imgBucket.bucketName
          }
    });
    //create lambda function s3 trigger event
    imgBucket.addEventNotification(s3.EventType.OBJECT_CREATED, new s3n.LambdaDestination(pdf2jpgLambda), { suffix: '.pdf' });
  }
}
module.exports = { ThumbnailStack }
 
         
        
        - 设置AWS CDK 运行时需要的ACCESS KEY 和SECRET KEY等信息。
# aws configure
 
       
 
        
        
        # cdk bootstrap
# cdk deploy
 
         
       
 
       3.5验证
 
       在CloudFormation中,可以看到所有创建的资源:
 
       
 
       
 
       在AWS CDK创建的S3存储桶中上传pdf,自动生成缩略图
 
       
 
       3.6删除所有实验资源
 
        
       4.自动化完成全栈部署
 
       [AWS海外]使用Amazon AMI 2启动的EC2实例做为部署环境,下载并解压附件
 
        
        
        $ sudo su –
# unzip thumbnail_ww.zip
# bash ./installEnv.sh
# source ~/.bash_profile
# npm install && cdk bootstrap && cdk deploy
 
         
       5.AWS中国区部署
 
       [AWS中国区]使用Amazon AMI 2启动的EC2实例做为部署环境,下载并解压附件
 
        
        
        $ sudo su –
# unzip thumbnail_cn.zip
# bash ./installEnv.sh
# source ~/.bash_profile
# npm install && cdk bootstrap && cdk deploy
 
         
       截止当前(2019年10月),AWS 中国区Lambda 暂不支持环境变量,部署之后需修改部署的Lambda 中关于环境变量的代码:
 
       
 
       修改为:
 
       EXTENSION = 'jpg',
OUTPUT_BUCKET = 's3存储桶名(在cloudformation的资源中获得)',
MIME_TYPE =  'image/jpeg';
 
       5.1中国区部署差异说明
 
        
        - 在installEnv.sh 中,为npm, Dockerhub 设置中国加速,
 
        npm config set registry http://r.cnpmjs.org
cat << EOF > /etc/docker/daemon.json
{
"registry-mirrors": ["http://registry.docker-cn.com"]
}
EOF
 
         
        
        - 在lib/ thumbnail-stack.js中去除Lambda暂不支持的环境变量

 
       本篇作者